diff --git "a/result.json" "b/result.json" deleted file mode 100644--- "a/result.json" +++ /dev/null @@ -1,102426 +0,0 @@ -{ - "timestamp_utc": "2025-12-08T20:00:25.887398+00:00", - "bench_binary": "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "system": { - "hostname": "ip-172-31-82-82", - "platform": "Linux-6.14.0-1015-aws-x86_64-with-glibc2.39", - "python": "3.12.3", - "cpu_count": 2, - "cpu_info": { - "lscpu": [ - { - "field": "Architecture:", - "data": "x86_64" - }, - { - "field": "CPU op-mode(s):", - "data": "32-bit, 64-bit" - }, - { - "field": "Address sizes:", - "data": "46 bits physical, 48 bits virtual" - }, - { - "field": "Byte Order:", - "data": "Little Endian" - }, - { - "field": "CPU(s):", - "data": "2" - }, - { - "field": "On-line CPU(s) list:", - "data": "0,1" - }, - { - "field": "Vendor ID:", - "data": "GenuineIntel" - }, - { - "field": "Model name:", - "data": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz" - }, - { - "field": "CPU family:", - "data": "6" - }, - { - "field": "Model:", - "data": "79" - }, - { - "field": "Thread(s) per core:", - "data": "1" - }, - { - "field": "Core(s) per socket:", - "data": "2" - }, - { - "field": "Socket(s):", - "data": "1" - }, - { - "field": "Stepping:", - "data": "1" - }, - { - "field": "BogoMIPS:", - "data": "4600.03" - }, - { - "field": "Flags:", - "data": "fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx rdtscp lm constant_tsc rep_good nopl xtopology cpuid tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm cpuid_fault pti fsgsbase bmi1 avx2 smep bmi2 erms invpcid xsaveopt" - }, - { - "field": "Hypervisor vendor:", - "data": "Xen" - }, - { - "field": "Virtualization type:", - "data": "full" - }, - { - "field": "L1d cache:", - "data": "64 KiB (2 instances)" - }, - { - "field": "L1i cache:", - "data": "64 KiB (2 instances)" - }, - { - "field": "L2 cache:", - "data": "512 KiB (2 instances)" - }, - { - "field": "L3 cache:", - "data": "45 MiB (1 instance)" - }, - { - "field": "NUMA node(s):", - "data": "1" - }, - { - "field": "NUMA node0 CPU(s):", - "data": "0,1" - }, - { - "field": "Vulnerability Gather data sampling:", - "data": "Not affected" - }, - { - "field": "Vulnerability Ghostwrite:", - "data": "Not affected" - }, - { - "field": "Vulnerability Indirect target selection:", - "data": "Mitigation; Aligned branch/return thunks" - }, - { - "field": "Vulnerability Itlb multihit:", - "data": "KVM: Mitigation: VMX unsupported" - }, - { - "field": "Vulnerability L1tf:", - "data": "Mitigation; PTE Inversion" - }, - { - "field": "Vulnerability Mds:", - "data": "Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown" - }, - { - "field": "Vulnerability Meltdown:", - "data": "Mitigation; PTI" - }, - { - "field": "Vulnerability Mmio stale data:", - "data": "Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown" - }, - { - "field": "Vulnerability Reg file data sampling:", - "data": "Not affected" - }, - { - "field": "Vulnerability Retbleed:", - "data": "Not affected" - }, - { - "field": "Vulnerability Spec rstack overflow:", - "data": "Not affected" - }, - { - "field": "Vulnerability Spec store bypass:", - "data": "Vulnerable" - }, - { - "field": "Vulnerability Spectre v1:", - "data": "Mitigation; usercopy/swapgs barriers and __user pointer sanitization" - }, - { - "field": "Vulnerability Spectre v2:", - "data": "Mitigation; Retpolines; STIBP disabled; RSB filling; PBRSB-eIBRS Not affected; BHI Retpoline" - }, - { - "field": "Vulnerability Srbds:", - "data": "Not affected" - }, - { - "field": "Vulnerability Tsa:", - "data": "Not affected" - }, - { - "field": "Vulnerability Tsx async abort:", - "data": "Not affected" - } - ] - }, - "total_ram_bytes": 8323702784 - }, - "runs": [ - { - "timestamp_utc": "2025-12-08T20:03:33.571483+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:03:13Z\",\n \"avg_ns\": 1106500666,\n \"stddev_ns\": 4620157,\n \"avg_ts\": 115.681346,\n \"stddev_ts\": 0.482714,\n \"samples_ns\": [ 1111326820, 1102119132, 1106056048 ],\n \"samples_ts\": [ 115.178, 116.14, 115.727 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:03:18Z\",\n \"avg_ns\": 5133914572,\n \"stddev_ns\": 330441335,\n \"avg_ts\": 24.998817,\n \"stddev_ts\": 1.551529,\n \"samples_ns\": [ 4935630764, 5515375657, 4950737296 ],\n \"samples_ts\": [ 25.9339, 23.2078, 25.8547 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:03:13Z", - "avg_ns": 1106500666, - "stddev_ns": 4620157, - "avg_ts": 115.681346, - "stddev_ts": 0.482714, - "samples_ns": [ - 1111326820, - 1102119132, - 1106056048 - ], - "samples_ts": [ - 115.178, - 116.14, - 115.727 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T20:03:18Z", - "avg_ns": 5133914572, - "stddev_ns": 330441335, - "avg_ts": 24.998817, - "stddev_ts": 1.551529, - "samples_ns": [ - 4935630764, - 5515375657, - 4950737296 - ], - "samples_ts": [ - 25.9339, - 23.2078, - 25.8547 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 0 - }, - { - "timestamp_utc": "2025-12-08T20:04:40.957262+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:03:34Z\",\n \"avg_ns\": 1110582042,\n \"stddev_ns\": 2401701,\n \"avg_ts\": 115.255239,\n \"stddev_ts\": 0.248976,\n \"samples_ns\": [ 1113314164, 1109628053, 1108803909 ],\n \"samples_ts\": [ 114.972, 115.354, 115.44 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:03:38Z\",\n \"avg_ns\": 20757011766,\n \"stddev_ns\": 362169642,\n \"avg_ts\": 24.671421,\n \"stddev_ts\": 0.434801,\n \"samples_ns\": [ 20339338168, 20983970456, 20947726675 ],\n \"samples_ts\": [ 25.1729, 24.3996, 24.4418 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:03:34Z", - "avg_ns": 1110582042, - "stddev_ns": 2401701, - "avg_ts": 115.255239, - "stddev_ts": 0.248976, - "samples_ns": [ - 1113314164, - 1109628053, - 1108803909 - ], - "samples_ts": [ - 114.972, - 115.354, - 115.44 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T20:03:38Z", - "avg_ns": 20757011766, - "stddev_ns": 362169642, - "avg_ts": 24.671421, - "stddev_ts": 0.434801, - "samples_ns": [ - 20339338168, - 20983970456, - 20947726675 - ], - "samples_ts": [ - 25.1729, - 24.3996, - 24.4418 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 1 - }, - { - "timestamp_utc": "2025-12-08T20:05:16.724425+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:04:41Z\",\n \"avg_ns\": 4621011173,\n \"stddev_ns\": 327537615,\n \"avg_ts\": 111.155929,\n \"stddev_ts\": 7.569175,\n \"samples_ns\": [ 4435464928, 4428371729, 4999196862 ],\n \"samples_ts\": [ 115.433, 115.618, 102.416 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:05:00Z\",\n \"avg_ns\": 5417089273,\n \"stddev_ns\": 27683136,\n \"avg_ts\": 23.629337,\n \"stddev_ts\": 0.120778,\n \"samples_ns\": [ 5444432323, 5417757352, 5389078144 ],\n \"samples_ts\": [ 23.5103, 23.626, 23.7517 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:04:41Z", - "avg_ns": 4621011173, - "stddev_ns": 327537615, - "avg_ts": 111.155929, - "stddev_ts": 7.569175, - "samples_ns": [ - 4435464928, - 4428371729, - 4999196862 - ], - "samples_ts": [ - 115.433, - 115.618, - 102.416 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T20:05:00Z", - "avg_ns": 5417089273, - "stddev_ns": 27683136, - "avg_ts": 23.629337, - "stddev_ts": 0.120778, - "samples_ns": [ - 5444432323, - 5417757352, - 5389078144 - ], - "samples_ts": [ - 23.5103, - 23.626, - 23.7517 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 2 - }, - { - "timestamp_utc": "2025-12-08T20:06:37.135732+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:05:17Z\",\n \"avg_ns\": 4625402712,\n \"stddev_ns\": 313844497,\n \"avg_ts\": 111.020977,\n \"stddev_ts\": 7.249103,\n \"samples_ns\": [ 4446029219, 4987793001, 4442385918 ],\n \"samples_ts\": [ 115.159, 102.651, 115.253 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:05:36Z\",\n \"avg_ns\": 20294551729,\n \"stddev_ns\": 81452388,\n \"avg_ts\": 25.228716,\n \"stddev_ts\": 0.101102,\n \"samples_ns\": [ 20272299181, 20384817558, 20226538450 ],\n \"samples_ts\": [ 25.2561, 25.1167, 25.3133 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:05:17Z", - "avg_ns": 4625402712, - "stddev_ns": 313844497, - "avg_ts": 111.020977, - "stddev_ts": 7.249103, - "samples_ns": [ - 4446029219, - 4987793001, - 4442385918 - ], - "samples_ts": [ - 115.159, - 102.651, - 115.253 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T20:05:36Z", - "avg_ns": 20294551729, - "stddev_ns": 81452388, - "avg_ts": 25.228716, - "stddev_ts": 0.101102, - "samples_ns": [ - 20272299181, - 20384817558, - 20226538450 - ], - "samples_ts": [ - 25.2561, - 25.1167, - 25.3133 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 3 - }, - { - "timestamp_utc": "2025-12-08T20:06:57.110539+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:06:37Z\",\n \"avg_ns\": 1102196216,\n \"stddev_ns\": 2757588,\n \"avg_ts\": 116.132256,\n \"stddev_ts\": 0.290238,\n \"samples_ns\": [ 1101452244, 1099887351, 1105249055 ],\n \"samples_ts\": [ 116.21, 116.376, 115.811 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:06:42Z\",\n \"avg_ns\": 4960849973,\n \"stddev_ns\": 32319276,\n \"avg_ts\": 25.802763,\n \"stddev_ts\": 0.168670,\n \"samples_ns\": [ 4923975529, 4984261347, 4974313043 ],\n \"samples_ts\": [ 25.9953, 25.6808, 25.7322 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:06:37Z", - "avg_ns": 1102196216, - "stddev_ns": 2757588, - "avg_ts": 116.132256, - "stddev_ts": 0.290238, - "samples_ns": [ - 1101452244, - 1099887351, - 1105249055 - ], - "samples_ts": [ - 116.21, - 116.376, - 115.811 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T20:06:42Z", - "avg_ns": 4960849973, - "stddev_ns": 32319276, - "avg_ts": 25.802763, - "stddev_ts": 0.16867, - "samples_ns": [ - 4923975529, - 4984261347, - 4974313043 - ], - "samples_ts": [ - 25.9953, - 25.6808, - 25.7322 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 4 - }, - { - "timestamp_utc": "2025-12-08T20:08:03.116774+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:06:57Z\",\n \"avg_ns\": 1106049596,\n \"stddev_ns\": 2007084,\n \"avg_ts\": 115.727434,\n \"stddev_ts\": 0.210126,\n \"samples_ns\": [ 1106624293, 1103818135, 1107706361 ],\n \"samples_ts\": [ 115.667, 115.961, 115.554 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:07:02Z\",\n \"avg_ns\": 20297889904,\n \"stddev_ns\": 76909551,\n \"avg_ts\": 25.224539,\n \"stddev_ts\": 0.095694,\n \"samples_ns\": [ 20364590505, 20213762534, 20315316675 ],\n \"samples_ts\": [ 25.1417, 25.3293, 25.2027 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:06:57Z", - "avg_ns": 1106049596, - "stddev_ns": 2007084, - "avg_ts": 115.727434, - "stddev_ts": 0.210126, - "samples_ns": [ - 1106624293, - 1103818135, - 1107706361 - ], - "samples_ts": [ - 115.667, - 115.961, - 115.554 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T20:07:02Z", - "avg_ns": 20297889904, - "stddev_ns": 76909551, - "avg_ts": 25.224539, - "stddev_ts": 0.095694, - "samples_ns": [ - 20364590505, - 20213762534, - 20315316675 - ], - "samples_ts": [ - 25.1417, - 25.3293, - 25.2027 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 5 - }, - { - "timestamp_utc": "2025-12-08T20:08:36.623513+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:08:03Z\",\n \"avg_ns\": 4476377813,\n \"stddev_ns\": 2314148,\n \"avg_ts\": 114.378212,\n \"stddev_ts\": 0.059063,\n \"samples_ns\": [ 4474864022, 4475230069, 4479039350 ],\n \"samples_ts\": [ 114.417, 114.408, 114.31 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:08:21Z\",\n \"avg_ns\": 4959751842,\n \"stddev_ns\": 28627467,\n \"avg_ts\": 25.808316,\n \"stddev_ts\": 0.149088,\n \"samples_ns\": [ 4929853106, 4962491743, 4986910679 ],\n \"samples_ts\": [ 25.9643, 25.7935, 25.6672 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:08:03Z", - "avg_ns": 4476377813, - "stddev_ns": 2314148, - "avg_ts": 114.378212, - "stddev_ts": 0.059063, - "samples_ns": [ - 4474864022, - 4475230069, - 4479039350 - ], - "samples_ts": [ - 114.417, - 114.408, - 114.31 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T20:08:21Z", - "avg_ns": 4959751842, - "stddev_ns": 28627467, - "avg_ts": 25.808316, - "stddev_ts": 0.149088, - "samples_ns": [ - 4929853106, - 4962491743, - 4986910679 - ], - "samples_ts": [ - 25.9643, - 25.7935, - 25.6672 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 6 - }, - { - "timestamp_utc": "2025-12-08T20:09:57.999516+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:08:37Z\",\n \"avg_ns\": 4479210778,\n \"stddev_ns\": 7260134,\n \"avg_ts\": 114.306051,\n \"stddev_ts\": 0.185186,\n \"samples_ns\": [ 4486988522, 4478029420, 4472614394 ],\n \"samples_ts\": [ 114.108, 114.336, 114.474 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:08:55Z\",\n \"avg_ns\": 20918691635,\n \"stddev_ns\": 251863715,\n \"avg_ts\": 24.478068,\n \"stddev_ts\": 0.292705,\n \"samples_ns\": [ 21209201523, 20761675114, 20785198268 ],\n \"samples_ts\": [ 24.1405, 24.6608, 24.6329 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:08:37Z", - "avg_ns": 4479210778, - "stddev_ns": 7260134, - "avg_ts": 114.306051, - "stddev_ts": 0.185186, - "samples_ns": [ - 4486988522, - 4478029420, - 4472614394 - ], - "samples_ts": [ - 114.108, - 114.336, - 114.474 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T20:08:55Z", - "avg_ns": 20918691635, - "stddev_ns": 251863715, - "avg_ts": 24.478068, - "stddev_ts": 0.292705, - "samples_ns": [ - 21209201523, - 20761675114, - 20785198268 - ], - "samples_ts": [ - 24.1405, - 24.6608, - 24.6329 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 7 - }, - { - "timestamp_utc": "2025-12-08T20:10:19.213721+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:09:58Z\",\n \"avg_ns\": 1105184118,\n \"stddev_ns\": 3629888,\n \"avg_ts\": 115.818638,\n \"stddev_ts\": 0.379925,\n \"samples_ns\": [ 1102125251, 1109194980, 1104232124 ],\n \"samples_ts\": [ 116.139, 115.399, 115.918 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:10:02Z\",\n \"avg_ns\": 5374942504,\n \"stddev_ns\": 345628570,\n \"avg_ts\": 23.882570,\n \"stddev_ts\": 1.594676,\n \"samples_ns\": [ 5585886941, 4976066235, 5562874337 ],\n \"samples_ts\": [ 22.9149, 25.7231, 23.0097 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:09:58Z", - "avg_ns": 1105184118, - "stddev_ns": 3629888, - "avg_ts": 115.818638, - "stddev_ts": 0.379925, - "samples_ns": [ - 1102125251, - 1109194980, - 1104232124 - ], - "samples_ts": [ - 116.139, - 115.399, - 115.918 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T20:10:02Z", - "avg_ns": 5374942504, - "stddev_ns": 345628570, - "avg_ts": 23.88257, - "stddev_ts": 1.594676, - "samples_ns": [ - 5585886941, - 4976066235, - 5562874337 - ], - "samples_ts": [ - 22.9149, - 25.7231, - 23.0097 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 8 - }, - { - "timestamp_utc": "2025-12-08T20:11:26.215949+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:10:19Z\",\n \"avg_ns\": 1102747557,\n \"stddev_ns\": 2343874,\n \"avg_ts\": 116.074060,\n \"stddev_ts\": 0.246939,\n \"samples_ns\": [ 1103746033, 1104426353, 1100070287 ],\n \"samples_ts\": [ 115.969, 115.897, 116.356 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:10:24Z\",\n \"avg_ns\": 20639936662,\n \"stddev_ns\": 351422511,\n \"avg_ts\": 24.811120,\n \"stddev_ts\": 0.426624,\n \"samples_ns\": [ 20833807997, 20234280749, 20851721241 ],\n \"samples_ts\": [ 24.5754, 25.3036, 24.5543 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:10:19Z", - "avg_ns": 1102747557, - "stddev_ns": 2343874, - "avg_ts": 116.07406, - "stddev_ts": 0.246939, - "samples_ns": [ - 1103746033, - 1104426353, - 1100070287 - ], - "samples_ts": [ - 115.969, - 115.897, - 116.356 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T20:10:24Z", - "avg_ns": 20639936662, - "stddev_ns": 351422511, - "avg_ts": 24.81112, - "stddev_ts": 0.426624, - "samples_ns": [ - 20833807997, - 20234280749, - 20851721241 - ], - "samples_ts": [ - 24.5754, - 25.3036, - 24.5543 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 9 - }, - { - "timestamp_utc": "2025-12-08T20:12:01.164077+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:11:26Z\",\n \"avg_ns\": 4690328629,\n \"stddev_ns\": 2167827,\n \"avg_ts\": 109.160810,\n \"stddev_ts\": 0.050392,\n \"samples_ns\": [ 4689609708, 4692762327, 4688613854 ],\n \"samples_ts\": [ 109.178, 109.104, 109.201 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:11:45Z\",\n \"avg_ns\": 5160083525,\n \"stddev_ns\": 364142446,\n \"avg_ts\": 24.885285,\n \"stddev_ts\": 1.689607,\n \"samples_ns\": [ 5579161187, 4920877437, 4980211953 ],\n \"samples_ts\": [ 22.9425, 26.0116, 25.7017 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:11:26Z", - "avg_ns": 4690328629, - "stddev_ns": 2167827, - "avg_ts": 109.16081, - "stddev_ts": 0.050392, - "samples_ns": [ - 4689609708, - 4692762327, - 4688613854 - ], - "samples_ts": [ - 109.178, - 109.104, - 109.201 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T20:11:45Z", - "avg_ns": 5160083525, - "stddev_ns": 364142446, - "avg_ts": 24.885285, - "stddev_ts": 1.689607, - "samples_ns": [ - 5579161187, - 4920877437, - 4980211953 - ], - "samples_ts": [ - 22.9425, - 26.0116, - 25.7017 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 10 - }, - { - "timestamp_utc": "2025-12-08T20:13:22.355728+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:12:01Z\",\n \"avg_ns\": 4794397539,\n \"stddev_ns\": 8287062,\n \"avg_ts\": 106.791524,\n \"stddev_ts\": 0.184739,\n \"samples_ns\": [ 4784947905, 4800423939, 4797820775 ],\n \"samples_ts\": [ 107.002, 106.657, 106.715 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:12:20Z\",\n \"avg_ns\": 20425213054,\n \"stddev_ns\": 349850401,\n \"avg_ts\": 25.071914,\n \"stddev_ts\": 0.425241,\n \"samples_ns\": [ 20829123924, 20229364171, 20217151068 ],\n \"samples_ts\": [ 24.581, 25.3097, 25.325 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:12:01Z", - "avg_ns": 4794397539, - "stddev_ns": 8287062, - "avg_ts": 106.791524, - "stddev_ts": 0.184739, - "samples_ns": [ - 4784947905, - 4800423939, - 4797820775 - ], - "samples_ts": [ - 107.002, - 106.657, - 106.715 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T20:12:20Z", - "avg_ns": 20425213054, - "stddev_ns": 349850401, - "avg_ts": 25.071914, - "stddev_ts": 0.425241, - "samples_ns": [ - 20829123924, - 20229364171, - 20217151068 - ], - "samples_ts": [ - 24.581, - 25.3097, - 25.325 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 11 - }, - { - "timestamp_utc": "2025-12-08T20:13:42.876553+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:13:22Z\",\n \"avg_ns\": 1292471578,\n \"stddev_ns\": 315915571,\n \"avg_ts\": 102.616856,\n \"stddev_ts\": 21.980772,\n \"samples_ns\": [ 1108853253, 1111304768, 1657256713 ],\n \"samples_ts\": [ 115.435, 115.18, 77.2361 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:13:27Z\",\n \"avg_ns\": 4952994205,\n \"stddev_ns\": 30181803,\n \"avg_ts\": 25.843592,\n \"stddev_ts\": 0.157039,\n \"samples_ns\": [ 4942332879, 4929589988, 4987059748 ],\n \"samples_ts\": [ 25.8987, 25.9656, 25.6664 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:13:22Z", - "avg_ns": 1292471578, - "stddev_ns": 315915571, - "avg_ts": 102.616856, - "stddev_ts": 21.980772, - "samples_ns": [ - 1108853253, - 1111304768, - 1657256713 - ], - "samples_ts": [ - 115.435, - 115.18, - 77.2361 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T20:13:27Z", - "avg_ns": 4952994205, - "stddev_ns": 30181803, - "avg_ts": 25.843592, - "stddev_ts": 0.157039, - "samples_ns": [ - 4942332879, - 4929589988, - 4987059748 - ], - "samples_ts": [ - 25.8987, - 25.9656, - 25.6664 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 12 - }, - { - "timestamp_utc": "2025-12-08T20:14:56.653716+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:13:43Z\",\n \"avg_ns\": 1102966892,\n \"stddev_ns\": 652592,\n \"avg_ts\": 116.050654,\n \"stddev_ts\": 0.068505,\n \"samples_ns\": [ 1103461701, 1103209432, 1102229545 ],\n \"samples_ts\": [ 115.999, 116.025, 116.128 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:13:47Z\",\n \"avg_ns\": 22896565701,\n \"stddev_ns\": 650794940,\n \"avg_ts\": 22.373291,\n \"stddev_ts\": 0.625852,\n \"samples_ns\": [ 22479687094, 22563531850, 23646478161 ],\n \"samples_ts\": [ 22.7761, 22.6915, 21.6523 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:13:43Z", - "avg_ns": 1102966892, - "stddev_ns": 652592, - "avg_ts": 116.050654, - "stddev_ts": 0.068505, - "samples_ns": [ - 1103461701, - 1103209432, - 1102229545 - ], - "samples_ts": [ - 115.999, - 116.025, - 116.128 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T20:13:47Z", - "avg_ns": 22896565701, - "stddev_ns": 650794940, - "avg_ts": 22.373291, - "stddev_ts": 0.625852, - "samples_ns": [ - 22479687094, - 22563531850, - 23646478161 - ], - "samples_ts": [ - 22.7761, - 22.6915, - 21.6523 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 13 - }, - { - "timestamp_utc": "2025-12-08T20:15:30.117658+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:14:57Z\",\n \"avg_ns\": 4459336183,\n \"stddev_ns\": 8598832,\n \"avg_ts\": 114.815580,\n \"stddev_ts\": 0.221483,\n \"samples_ns\": [ 4460621452, 4450167323, 4467219775 ],\n \"samples_ts\": [ 114.782, 115.052, 114.613 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:15:15Z\",\n \"avg_ns\": 4977866472,\n \"stddev_ns\": 31717602,\n \"avg_ts\": 25.714525,\n \"stddev_ts\": 0.164279,\n \"samples_ns\": [ 5003674782, 4987466865, 4942457769 ],\n \"samples_ts\": [ 25.5812, 25.6643, 25.898 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:14:57Z", - "avg_ns": 4459336183, - "stddev_ns": 8598832, - "avg_ts": 114.81558, - "stddev_ts": 0.221483, - "samples_ns": [ - 4460621452, - 4450167323, - 4467219775 - ], - "samples_ts": [ - 114.782, - 115.052, - 114.613 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T20:15:15Z", - "avg_ns": 4977866472, - "stddev_ns": 31717602, - "avg_ts": 25.714525, - "stddev_ts": 0.164279, - "samples_ns": [ - 5003674782, - 4987466865, - 4942457769 - ], - "samples_ts": [ - 25.5812, - 25.6643, - 25.898 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 14 - }, - { - "timestamp_utc": "2025-12-08T20:16:49.919883+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:15:30Z\",\n \"avg_ns\": 4460933992,\n \"stddev_ns\": 6750753,\n \"avg_ts\": 114.774346,\n \"stddev_ts\": 0.173832,\n \"samples_ns\": [ 4453141155, 4464682182, 4464978640 ],\n \"samples_ts\": [ 114.975, 114.678, 114.67 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:15:48Z\",\n \"avg_ns\": 20434866792,\n \"stddev_ns\": 79067402,\n \"avg_ts\": 25.055465,\n \"stddev_ts\": 0.096734,\n \"samples_ns\": [ 20525935322, 20383715915, 20394949139 ],\n \"samples_ts\": [ 24.9441, 25.1181, 25.1043 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:15:30Z", - "avg_ns": 4460933992, - "stddev_ns": 6750753, - "avg_ts": 114.774346, - "stddev_ts": 0.173832, - "samples_ns": [ - 4453141155, - 4464682182, - 4464978640 - ], - "samples_ts": [ - 114.975, - 114.678, - 114.67 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T20:15:48Z", - "avg_ns": 20434866792, - "stddev_ns": 79067402, - "avg_ts": 25.055465, - "stddev_ts": 0.096734, - "samples_ns": [ - 20525935322, - 20383715915, - 20394949139 - ], - "samples_ts": [ - 24.9441, - 25.1181, - 25.1043 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 15 - }, - { - "timestamp_utc": "2025-12-08T20:17:10.585724+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:16:50Z\",\n \"avg_ns\": 1104539903,\n \"stddev_ns\": 1489854,\n \"avg_ts\": 115.885496,\n \"stddev_ts\": 0.156253,\n \"samples_ns\": [ 1105981586, 1104630617, 1103007508 ],\n \"samples_ts\": [ 115.734, 115.876, 116.046 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:16:54Z\",\n \"avg_ns\": 5179965667,\n \"stddev_ns\": 338399634,\n \"avg_ts\": 24.778558,\n \"stddev_ts\": 1.560667,\n \"samples_ns\": [ 5570198741, 5002253541, 4967444721 ],\n \"samples_ts\": [ 22.9794, 25.5885, 25.7678 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:16:50Z", - "avg_ns": 1104539903, - "stddev_ns": 1489854, - "avg_ts": 115.885496, - "stddev_ts": 0.156253, - "samples_ns": [ - 1105981586, - 1104630617, - 1103007508 - ], - "samples_ts": [ - 115.734, - 115.876, - 116.046 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T20:16:54Z", - "avg_ns": 5179965667, - "stddev_ns": 338399634, - "avg_ts": 24.778558, - "stddev_ts": 1.560667, - "samples_ns": [ - 5570198741, - 5002253541, - 4967444721 - ], - "samples_ts": [ - 22.9794, - 25.5885, - 25.7678 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 16 - }, - { - "timestamp_utc": "2025-12-08T20:18:22.169253+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:17:11Z\",\n \"avg_ns\": 1101695893,\n \"stddev_ns\": 2639438,\n \"avg_ts\": 116.184957,\n \"stddev_ts\": 0.278718,\n \"samples_ns\": [ 1098668859, 1102902108, 1103516712 ],\n \"samples_ts\": [ 116.505, 116.057, 115.993 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:17:15Z\",\n \"avg_ns\": 22166510127,\n \"stddev_ns\": 800992569,\n \"avg_ts\": 23.118242,\n \"stddev_ts\": 0.844248,\n \"samples_ns\": [ 21299876415, 22879645819, 22320008148 ],\n \"samples_ts\": [ 24.0377, 22.378, 22.9391 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:17:11Z", - "avg_ns": 1101695893, - "stddev_ns": 2639438, - "avg_ts": 116.184957, - "stddev_ts": 0.278718, - "samples_ns": [ - 1098668859, - 1102902108, - 1103516712 - ], - "samples_ts": [ - 116.505, - 116.057, - 115.993 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T20:17:15Z", - "avg_ns": 22166510127, - "stddev_ns": 800992569, - "avg_ts": 23.118242, - "stddev_ts": 0.844248, - "samples_ns": [ - 21299876415, - 22879645819, - 22320008148 - ], - "samples_ts": [ - 24.0377, - 22.378, - 22.9391 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 17 - }, - { - "timestamp_utc": "2025-12-08T20:18:55.933715+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:18:22Z\",\n \"avg_ns\": 4579015554,\n \"stddev_ns\": 4946767,\n \"avg_ts\": 111.814514,\n \"stddev_ts\": 0.120843,\n \"samples_ns\": [ 4581273232, 4573343752, 4582429680 ],\n \"samples_ts\": [ 111.759, 111.953, 111.731 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:18:41Z\",\n \"avg_ns\": 4928851233,\n \"stddev_ns\": 18073776,\n \"avg_ts\": 25.969772,\n \"stddev_ts\": 0.095037,\n \"samples_ns\": [ 4949604447, 4916569464, 4920379790 ],\n \"samples_ts\": [ 25.8607, 26.0344, 26.0143 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:18:22Z", - "avg_ns": 4579015554, - "stddev_ns": 4946767, - "avg_ts": 111.814514, - "stddev_ts": 0.120843, - "samples_ns": [ - 4581273232, - 4573343752, - 4582429680 - ], - "samples_ts": [ - 111.759, - 111.953, - 111.731 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T20:18:41Z", - "avg_ns": 4928851233, - "stddev_ns": 18073776, - "avg_ts": 25.969772, - "stddev_ts": 0.095037, - "samples_ns": [ - 4949604447, - 4916569464, - 4920379790 - ], - "samples_ts": [ - 25.8607, - 26.0344, - 26.0143 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 18 - }, - { - "timestamp_utc": "2025-12-08T20:20:15.249296+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:18:56Z\",\n \"avg_ns\": 4472409237,\n \"stddev_ns\": 17716890,\n \"avg_ts\": 114.480883,\n \"stddev_ts\": 0.453579,\n \"samples_ns\": [ 4489855648, 4472937862, 4454434203 ],\n \"samples_ts\": [ 114.035, 114.466, 114.942 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:19:14Z\",\n \"avg_ns\": 20249860285,\n \"stddev_ns\": 39821615,\n \"avg_ts\": 25.284190,\n \"stddev_ts\": 0.049701,\n \"samples_ns\": [ 20213261250, 20292266731, 20244052874 ],\n \"samples_ts\": [ 25.3299, 25.2313, 25.2914 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:18:56Z", - "avg_ns": 4472409237, - "stddev_ns": 17716890, - "avg_ts": 114.480883, - "stddev_ts": 0.453579, - "samples_ns": [ - 4489855648, - 4472937862, - 4454434203 - ], - "samples_ts": [ - 114.035, - 114.466, - 114.942 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T20:19:14Z", - "avg_ns": 20249860285, - "stddev_ns": 39821615, - "avg_ts": 25.28419, - "stddev_ts": 0.049701, - "samples_ns": [ - 20213261250, - 20292266731, - 20244052874 - ], - "samples_ts": [ - 25.3299, - 25.2313, - 25.2914 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 19 - }, - { - "timestamp_utc": "2025-12-08T20:20:35.773016+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:20:15Z\",\n \"avg_ns\": 1102130631,\n \"stddev_ns\": 6152721,\n \"avg_ts\": 116.141092,\n \"stddev_ts\": 0.647159,\n \"samples_ns\": [ 1108882015, 1096839192, 1100670686 ],\n \"samples_ts\": [ 115.432, 116.699, 116.293 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:20:20Z\",\n \"avg_ns\": 5149274508,\n \"stddev_ns\": 337030802,\n \"avg_ts\": 24.926469,\n \"stddev_ts\": 1.572102,\n \"samples_ns\": [ 4951950112, 5538431222, 4957442192 ],\n \"samples_ts\": [ 25.8484, 23.1112, 25.8198 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:20:15Z", - "avg_ns": 1102130631, - "stddev_ns": 6152721, - "avg_ts": 116.141092, - "stddev_ts": 0.647159, - "samples_ns": [ - 1108882015, - 1096839192, - 1100670686 - ], - "samples_ts": [ - 115.432, - 116.699, - 116.293 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T20:20:20Z", - "avg_ns": 5149274508, - "stddev_ns": 337030802, - "avg_ts": 24.926469, - "stddev_ts": 1.572102, - "samples_ns": [ - 4951950112, - 5538431222, - 4957442192 - ], - "samples_ts": [ - 25.8484, - 23.1112, - 25.8198 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 20 - }, - { - "timestamp_utc": "2025-12-08T20:21:42.168719+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:20:36Z\",\n \"avg_ns\": 1119817838,\n \"stddev_ns\": 2037030,\n \"avg_ts\": 114.304557,\n \"stddev_ts\": 0.207781,\n \"samples_ns\": [ 1119478379, 1117972419, 1122002718 ],\n \"samples_ts\": [ 114.339, 114.493, 114.082 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:20:40Z\",\n \"avg_ns\": 20414553882,\n \"stddev_ns\": 340529390,\n \"avg_ts\": 25.084757,\n \"stddev_ts\": 0.414648,\n \"samples_ns\": [ 20182763018, 20805522017, 20255376612 ],\n \"samples_ts\": [ 25.3682, 24.6089, 25.2772 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:20:36Z", - "avg_ns": 1119817838, - "stddev_ns": 2037030, - "avg_ts": 114.304557, - "stddev_ts": 0.207781, - "samples_ns": [ - 1119478379, - 1117972419, - 1122002718 - ], - "samples_ts": [ - 114.339, - 114.493, - 114.082 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T20:20:40Z", - "avg_ns": 20414553882, - "stddev_ns": 340529390, - "avg_ts": 25.084757, - "stddev_ts": 0.414648, - "samples_ns": [ - 20182763018, - 20805522017, - 20255376612 - ], - "samples_ts": [ - 25.3682, - 24.6089, - 25.2772 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 21 - }, - { - "timestamp_utc": "2025-12-08T20:22:17.042348+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:21:42Z\",\n \"avg_ns\": 4881319842,\n \"stddev_ns\": 316997634,\n \"avg_ts\": 105.174697,\n \"stddev_ts\": 6.583672,\n \"samples_ns\": [ 4692844153, 5247302377, 4703812998 ],\n \"samples_ts\": [ 109.102, 97.5739, 108.848 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:22:02Z\",\n \"avg_ns\": 4944474523,\n \"stddev_ns\": 48501185,\n \"avg_ts\": 25.889137,\n \"stddev_ts\": 0.252907,\n \"samples_ns\": [ 4998744366, 4905362822, 4929316383 ],\n \"samples_ts\": [ 25.6064, 26.0939, 25.9671 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:21:42Z", - "avg_ns": 4881319842, - "stddev_ns": 316997634, - "avg_ts": 105.174697, - "stddev_ts": 6.583672, - "samples_ns": [ - 4692844153, - 5247302377, - 4703812998 - ], - "samples_ts": [ - 109.102, - 97.5739, - 108.848 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T20:22:02Z", - "avg_ns": 4944474523, - "stddev_ns": 48501185, - "avg_ts": 25.889137, - "stddev_ts": 0.252907, - "samples_ns": [ - 4998744366, - 4905362822, - 4929316383 - ], - "samples_ts": [ - 25.6064, - 26.0939, - 25.9671 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 22 - }, - { - "timestamp_utc": "2025-12-08T20:23:43.035242+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:22:17Z\",\n \"avg_ns\": 4706107607,\n \"stddev_ns\": 6394865,\n \"avg_ts\": 108.794926,\n \"stddev_ts\": 0.147757,\n \"samples_ns\": [ 4713208483, 4704311444, 4700802894 ],\n \"samples_ts\": [ 108.631, 108.836, 108.918 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:22:36Z\",\n \"avg_ns\": 22166222320,\n \"stddev_ns\": 165358774,\n \"avg_ts\": 23.099068,\n \"stddev_ts\": 0.173008,\n \"samples_ns\": [ 21976892252, 22282313879, 22239460830 ],\n \"samples_ts\": [ 23.2972, 22.9779, 23.0221 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:22:17Z", - "avg_ns": 4706107607, - "stddev_ns": 6394865, - "avg_ts": 108.794926, - "stddev_ts": 0.147757, - "samples_ns": [ - 4713208483, - 4704311444, - 4700802894 - ], - "samples_ts": [ - 108.631, - 108.836, - 108.918 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T20:22:36Z", - "avg_ns": 22166222320, - "stddev_ns": 165358774, - "avg_ts": 23.099068, - "stddev_ts": 0.173008, - "samples_ns": [ - 21976892252, - 22282313879, - 22239460830 - ], - "samples_ts": [ - 23.2972, - 22.9779, - 23.0221 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 23 - }, - { - "timestamp_utc": "2025-12-08T20:24:03.635119+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:23:43Z\",\n \"avg_ns\": 1101291814,\n \"stddev_ns\": 1321623,\n \"avg_ts\": 116.227253,\n \"stddev_ts\": 0.139473,\n \"samples_ns\": [ 1102505449, 1099884432, 1101485562 ],\n \"samples_ts\": [ 116.099, 116.376, 116.207 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:23:48Z\",\n \"avg_ns\": 5166143637,\n \"stddev_ns\": 330263379,\n \"avg_ts\": 24.841999,\n \"stddev_ts\": 1.532142,\n \"samples_ns\": [ 4960702985, 5547107568, 4990620358 ],\n \"samples_ts\": [ 25.8028, 23.0751, 25.6481 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:23:43Z", - "avg_ns": 1101291814, - "stddev_ns": 1321623, - "avg_ts": 116.227253, - "stddev_ts": 0.139473, - "samples_ns": [ - 1102505449, - 1099884432, - 1101485562 - ], - "samples_ts": [ - 116.099, - 116.376, - 116.207 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T20:23:48Z", - "avg_ns": 5166143637, - "stddev_ns": 330263379, - "avg_ts": 24.841999, - "stddev_ts": 1.532142, - "samples_ns": [ - 4960702985, - 5547107568, - 4990620358 - ], - "samples_ts": [ - 25.8028, - 23.0751, - 25.6481 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 24 - }, - { - "timestamp_utc": "2025-12-08T20:25:12.311068+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:24:04Z\",\n \"avg_ns\": 1099775269,\n \"stddev_ns\": 1011720,\n \"avg_ts\": 116.387480,\n \"stddev_ts\": 0.107068,\n \"samples_ns\": [ 1100367871, 1100350230, 1098607707 ],\n \"samples_ts\": [ 116.325, 116.327, 116.511 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:24:08Z\",\n \"avg_ns\": 21186654955,\n \"stddev_ns\": 855002405,\n \"avg_ts\": 24.192175,\n \"stddev_ts\": 0.967343,\n \"samples_ns\": [ 22104646445, 21042298354, 20413020068 ],\n \"samples_ts\": [ 23.1626, 24.3319, 25.082 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:24:04Z", - "avg_ns": 1099775269, - "stddev_ns": 1011720, - "avg_ts": 116.38748, - "stddev_ts": 0.107068, - "samples_ns": [ - 1100367871, - 1100350230, - 1098607707 - ], - "samples_ts": [ - 116.325, - 116.327, - 116.511 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T20:24:08Z", - "avg_ns": 21186654955, - "stddev_ns": 855002405, - "avg_ts": 24.192175, - "stddev_ts": 0.967343, - "samples_ns": [ - 22104646445, - 21042298354, - 20413020068 - ], - "samples_ts": [ - 23.1626, - 24.3319, - 25.082 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 25 - }, - { - "timestamp_utc": "2025-12-08T20:25:45.681680+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:25:12Z\",\n \"avg_ns\": 4448221089,\n \"stddev_ns\": 4024378,\n \"avg_ts\": 115.102255,\n \"stddev_ts\": 0.104176,\n \"samples_ns\": [ 4443704110, 4449534435, 4451424722 ],\n \"samples_ts\": [ 115.219, 115.068, 115.019 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:25:30Z\",\n \"avg_ns\": 4971029571,\n \"stddev_ns\": 14029490,\n \"avg_ts\": 25.749330,\n \"stddev_ts\": 0.072665,\n \"samples_ns\": [ 4985170833, 4970802928, 4957114953 ],\n \"samples_ts\": [ 25.6762, 25.7504, 25.8215 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:25:12Z", - "avg_ns": 4448221089, - "stddev_ns": 4024378, - "avg_ts": 115.102255, - "stddev_ts": 0.104176, - "samples_ns": [ - 4443704110, - 4449534435, - 4451424722 - ], - "samples_ts": [ - 115.219, - 115.068, - 115.019 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T20:25:30Z", - "avg_ns": 4971029571, - "stddev_ns": 14029490, - "avg_ts": 25.74933, - "stddev_ts": 0.072665, - "samples_ns": [ - 4985170833, - 4970802928, - 4957114953 - ], - "samples_ts": [ - 25.6762, - 25.7504, - 25.8215 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 26 - }, - { - "timestamp_utc": "2025-12-08T20:27:10.946572+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:25:46Z\",\n \"avg_ns\": 4636817119,\n \"stddev_ns\": 337230205,\n \"avg_ts\": 110.795533,\n \"stddev_ts\": 7.733409,\n \"samples_ns\": [ 4444837259, 4439409697, 5026204402 ],\n \"samples_ts\": [ 115.19, 115.331, 101.866 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:26:04Z\",\n \"avg_ns\": 22064628595,\n \"stddev_ns\": 150235310,\n \"avg_ts\": 23.205280,\n \"stddev_ts\": 0.158570,\n \"samples_ns\": [ 21892913438, 22171842137, 22129130210 ],\n \"samples_ts\": [ 23.3866, 23.0924, 23.1369 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:25:46Z", - "avg_ns": 4636817119, - "stddev_ns": 337230205, - "avg_ts": 110.795533, - "stddev_ts": 7.733409, - "samples_ns": [ - 4444837259, - 4439409697, - 5026204402 - ], - "samples_ts": [ - 115.19, - 115.331, - 101.866 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T20:26:04Z", - "avg_ns": 22064628595, - "stddev_ns": 150235310, - "avg_ts": 23.20528, - "stddev_ts": 0.15857, - "samples_ns": [ - 21892913438, - 22171842137, - 22129130210 - ], - "samples_ts": [ - 23.3866, - 23.0924, - 23.1369 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 27 - }, - { - "timestamp_utc": "2025-12-08T20:27:30.854812+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:27:11Z\",\n \"avg_ns\": 1104636585,\n \"stddev_ns\": 4518513,\n \"avg_ts\": 115.876508,\n \"stddev_ts\": 0.474885,\n \"samples_ns\": [ 1108134796, 1099535205, 1106239755 ],\n \"samples_ts\": [ 115.509, 116.413, 115.707 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:27:15Z\",\n \"avg_ns\": 4935855754,\n \"stddev_ns\": 20754296,\n \"avg_ts\": 25.932992,\n \"stddev_ts\": 0.109085,\n \"samples_ns\": [ 4955944837, 4937127243, 4914495184 ],\n \"samples_ts\": [ 25.8276, 25.926, 26.0454 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:27:11Z", - "avg_ns": 1104636585, - "stddev_ns": 4518513, - "avg_ts": 115.876508, - "stddev_ts": 0.474885, - "samples_ns": [ - 1108134796, - 1099535205, - 1106239755 - ], - "samples_ts": [ - 115.509, - 116.413, - 115.707 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T20:27:15Z", - "avg_ns": 4935855754, - "stddev_ns": 20754296, - "avg_ts": 25.932992, - "stddev_ts": 0.109085, - "samples_ns": [ - 4955944837, - 4937127243, - 4914495184 - ], - "samples_ts": [ - 25.8276, - 25.926, - 26.0454 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 28 - }, - { - "timestamp_utc": "2025-12-08T20:28:41.790455+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:27:31Z\",\n \"avg_ns\": 1106684147,\n \"stddev_ns\": 808378,\n \"avg_ts\": 115.660865,\n \"stddev_ts\": 0.084504,\n \"samples_ns\": [ 1106866796, 1107385574, 1105800071 ],\n \"samples_ts\": [ 115.642, 115.588, 115.753 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:27:35Z\",\n \"avg_ns\": 21939110123,\n \"stddev_ns\": 594930932,\n \"avg_ts\": 23.348947,\n \"stddev_ts\": 0.643232,\n \"samples_ns\": [ 21252145757, 22284247415, 22280937199 ],\n \"samples_ts\": [ 24.0917, 22.9759, 22.9793 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:27:31Z", - "avg_ns": 1106684147, - "stddev_ns": 808378, - "avg_ts": 115.660865, - "stddev_ts": 0.084504, - "samples_ns": [ - 1106866796, - 1107385574, - 1105800071 - ], - "samples_ts": [ - 115.642, - 115.588, - 115.753 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T20:27:35Z", - "avg_ns": 21939110123, - "stddev_ns": 594930932, - "avg_ts": 23.348947, - "stddev_ts": 0.643232, - "samples_ns": [ - 21252145757, - 22284247415, - 22280937199 - ], - "samples_ts": [ - 24.0917, - 22.9759, - 22.9793 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 29 - }, - { - "timestamp_utc": "2025-12-08T20:29:15.302281+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:28:42Z\",\n \"avg_ns\": 4484191379,\n \"stddev_ns\": 4902134,\n \"avg_ts\": 114.178982,\n \"stddev_ts\": 0.124838,\n \"samples_ns\": [ 4488697385, 4478972172, 4484904581 ],\n \"samples_ts\": [ 114.064, 114.312, 114.161 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:29:00Z\",\n \"avg_ns\": 4970309734,\n \"stddev_ns\": 21968630,\n \"avg_ts\": 25.753258,\n \"stddev_ts\": 0.114106,\n \"samples_ns\": [ 4985134354, 4945070934, 4980723916 ],\n \"samples_ts\": [ 25.6763, 25.8844, 25.6991 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:28:42Z", - "avg_ns": 4484191379, - "stddev_ns": 4902134, - "avg_ts": 114.178982, - "stddev_ts": 0.124838, - "samples_ns": [ - 4488697385, - 4478972172, - 4484904581 - ], - "samples_ts": [ - 114.064, - 114.312, - 114.161 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T20:29:00Z", - "avg_ns": 4970309734, - "stddev_ns": 21968630, - "avg_ts": 25.753258, - "stddev_ts": 0.114106, - "samples_ns": [ - 4985134354, - 4945070934, - 4980723916 - ], - "samples_ts": [ - 25.6763, - 25.8844, - 25.6991 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 30 - }, - { - "timestamp_utc": "2025-12-08T20:30:34.844130+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:29:15Z\",\n \"avg_ns\": 4480067361,\n \"stddev_ns\": 7656111,\n \"avg_ts\": 114.284219,\n \"stddev_ts\": 0.195465,\n \"samples_ns\": [ 4471350627, 4485700668, 4483150789 ],\n \"samples_ts\": [ 114.507, 114.14, 114.205 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:29:33Z\",\n \"avg_ns\": 20304342910,\n \"stddev_ns\": 101369383,\n \"avg_ts\": 25.216698,\n \"stddev_ts\": 0.125532,\n \"samples_ns\": [ 20245570349, 20421393844, 20246064537 ],\n \"samples_ts\": [ 25.2895, 25.0717, 25.2889 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:29:15Z", - "avg_ns": 4480067361, - "stddev_ns": 7656111, - "avg_ts": 114.284219, - "stddev_ts": 0.195465, - "samples_ns": [ - 4471350627, - 4485700668, - 4483150789 - ], - "samples_ts": [ - 114.507, - 114.14, - 114.205 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T20:29:33Z", - "avg_ns": 20304342910, - "stddev_ns": 101369383, - "avg_ts": 25.216698, - "stddev_ts": 0.125532, - "samples_ns": [ - 20245570349, - 20421393844, - 20246064537 - ], - "samples_ts": [ - 25.2895, - 25.0717, - 25.2889 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 31 - }, - { - "timestamp_utc": "2025-12-08T20:30:54.777050+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:30:35Z\",\n \"avg_ns\": 1110667718,\n \"stddev_ns\": 1080090,\n \"avg_ts\": 115.246062,\n \"stddev_ts\": 0.111986,\n \"samples_ns\": [ 1110429757, 1109726975, 1111846423 ],\n \"samples_ts\": [ 115.271, 115.344, 115.124 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:30:39Z\",\n \"avg_ns\": 4939995584,\n \"stddev_ns\": 12942653,\n \"avg_ts\": 25.911073,\n \"stddev_ts\": 0.067849,\n \"samples_ns\": [ 4928023470, 4953727851, 4938235433 ],\n \"samples_ts\": [ 25.9739, 25.8391, 25.9202 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:30:35Z", - "avg_ns": 1110667718, - "stddev_ns": 1080090, - "avg_ts": 115.246062, - "stddev_ts": 0.111986, - "samples_ns": [ - 1110429757, - 1109726975, - 1111846423 - ], - "samples_ts": [ - 115.271, - 115.344, - 115.124 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T20:30:39Z", - "avg_ns": 4939995584, - "stddev_ns": 12942653, - "avg_ts": 25.911073, - "stddev_ts": 0.067849, - "samples_ns": [ - 4928023470, - 4953727851, - 4938235433 - ], - "samples_ts": [ - 25.9739, - 25.8391, - 25.9202 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 32 - }, - { - "timestamp_utc": "2025-12-08T20:32:02.273403+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:30:55Z\",\n \"avg_ns\": 1101878650,\n \"stddev_ns\": 4018236,\n \"avg_ts\": 116.166270,\n \"stddev_ts\": 0.422791,\n \"samples_ns\": [ 1106486142, 1099101105, 1100048703 ],\n \"samples_ts\": [ 115.682, 116.459, 116.358 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:30:59Z\",\n \"avg_ns\": 20793183568,\n \"stddev_ns\": 1018223666,\n \"avg_ts\": 24.661797,\n \"stddev_ts\": 1.174501,\n \"samples_ns\": [ 21968779948, 20189283911, 20221486847 ],\n \"samples_ts\": [ 23.3058, 25.36, 25.3196 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:30:55Z", - "avg_ns": 1101878650, - "stddev_ns": 4018236, - "avg_ts": 116.16627, - "stddev_ts": 0.422791, - "samples_ns": [ - 1106486142, - 1099101105, - 1100048703 - ], - "samples_ts": [ - 115.682, - 116.459, - 116.358 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T20:30:59Z", - "avg_ns": 20793183568, - "stddev_ns": 1018223666, - "avg_ts": 24.661797, - "stddev_ts": 1.174501, - "samples_ns": [ - 21968779948, - 20189283911, - 20221486847 - ], - "samples_ts": [ - 23.3058, - 25.36, - 25.3196 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 33 - }, - { - "timestamp_utc": "2025-12-08T20:32:36.779186+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:32:02Z\",\n \"avg_ns\": 4708705236,\n \"stddev_ns\": 7757190,\n \"avg_ts\": 108.734971,\n \"stddev_ts\": 0.179234,\n \"samples_ns\": [ 4715226698, 4710761666, 4700127345 ],\n \"samples_ts\": [ 108.584, 108.687, 108.933 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:32:21Z\",\n \"avg_ns\": 4983226252,\n \"stddev_ns\": 21911866,\n \"avg_ts\": 25.686501,\n \"stddev_ts\": 0.112725,\n \"samples_ns\": [ 5007873897, 4975851683, 4965953177 ],\n \"samples_ts\": [ 25.5597, 25.7242, 25.7755 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:32:02Z", - "avg_ns": 4708705236, - "stddev_ns": 7757190, - "avg_ts": 108.734971, - "stddev_ts": 0.179234, - "samples_ns": [ - 4715226698, - 4710761666, - 4700127345 - ], - "samples_ts": [ - 108.584, - 108.687, - 108.933 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T20:32:21Z", - "avg_ns": 4983226252, - "stddev_ns": 21911866, - "avg_ts": 25.686501, - "stddev_ts": 0.112725, - "samples_ns": [ - 5007873897, - 4975851683, - 4965953177 - ], - "samples_ts": [ - 25.5597, - 25.7242, - 25.7755 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 34 - }, - { - "timestamp_utc": "2025-12-08T20:33:59.591441+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:32:37Z\",\n \"avg_ns\": 4710023117,\n \"stddev_ns\": 1688910,\n \"avg_ts\": 108.704359,\n \"stddev_ts\": 0.038955,\n \"samples_ns\": [ 4711144020, 4708082307, 4710843025 ],\n \"samples_ts\": [ 108.678, 108.749, 108.685 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:32:56Z\",\n \"avg_ns\": 20893355729,\n \"stddev_ns\": 182501047,\n \"avg_ts\": 24.506638,\n \"stddev_ts\": 0.213001,\n \"samples_ns\": [ 21103828844, 20779041039, 20797197305 ],\n \"samples_ts\": [ 24.261, 24.6402, 24.6187 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:32:37Z", - "avg_ns": 4710023117, - "stddev_ns": 1688910, - "avg_ts": 108.704359, - "stddev_ts": 0.038955, - "samples_ns": [ - 4711144020, - 4708082307, - 4710843025 - ], - "samples_ts": [ - 108.678, - 108.749, - 108.685 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T20:32:56Z", - "avg_ns": 20893355729, - "stddev_ns": 182501047, - "avg_ts": 24.506638, - "stddev_ts": 0.213001, - "samples_ns": [ - 21103828844, - 20779041039, - 20797197305 - ], - "samples_ts": [ - 24.261, - 24.6402, - 24.6187 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 35 - }, - { - "timestamp_utc": "2025-12-08T20:34:11.059789+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:34:00Z\",\n \"avg_ns\": 580767350,\n \"stddev_ns\": 2912625,\n \"avg_ts\": 220.401768,\n \"stddev_ts\": 1.107880,\n \"samples_ns\": [ 581813345, 583012322, 577476385 ],\n \"samples_ts\": [ 220.002, 219.549, 221.654 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:34:02Z\",\n \"avg_ns\": 2829500076,\n \"stddev_ns\": 8125641,\n \"avg_ts\": 45.237922,\n \"stddev_ts\": 0.130033,\n \"samples_ns\": [ 2836487937, 2820584178, 2831428115 ],\n \"samples_ts\": [ 45.1262, 45.3807, 45.2069 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:34:00Z", - "avg_ns": 580767350, - "stddev_ns": 2912625, - "avg_ts": 220.401768, - "stddev_ts": 1.10788, - "samples_ns": [ - 581813345, - 583012322, - 577476385 - ], - "samples_ts": [ - 220.002, - 219.549, - 221.654 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T20:34:02Z", - "avg_ns": 2829500076, - "stddev_ns": 8125641, - "avg_ts": 45.237922, - "stddev_ts": 0.130033, - "samples_ns": [ - 2836487937, - 2820584178, - 2831428115 - ], - "samples_ts": [ - 45.1262, - 45.3807, - 45.2069 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 36 - }, - { - "timestamp_utc": "2025-12-08T20:34:48.554882+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:34:11Z\",\n \"avg_ns\": 578781264,\n \"stddev_ns\": 1844187,\n \"avg_ts\": 221.155859,\n \"stddev_ts\": 0.705782,\n \"samples_ns\": [ 579491049, 580165101, 576687642 ],\n \"samples_ts\": [ 220.883, 220.627, 221.957 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:34:13Z\",\n \"avg_ns\": 11505129720,\n \"stddev_ns\": 18601575,\n \"avg_ts\": 44.501966,\n \"stddev_ts\": 0.071898,\n \"samples_ns\": [ 11490509580, 11498812769, 11526066812 ],\n \"samples_ts\": [ 44.5585, 44.5263, 44.4211 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:34:11Z", - "avg_ns": 578781264, - "stddev_ns": 1844187, - "avg_ts": 221.155859, - "stddev_ts": 0.705782, - "samples_ns": [ - 579491049, - 580165101, - 576687642 - ], - "samples_ts": [ - 220.883, - 220.627, - 221.957 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T20:34:13Z", - "avg_ns": 11505129720, - "stddev_ns": 18601575, - "avg_ts": 44.501966, - "stddev_ts": 0.071898, - "samples_ns": [ - 11490509580, - 11498812769, - 11526066812 - ], - "samples_ts": [ - 44.5585, - 44.5263, - 44.4211 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 37 - }, - { - "timestamp_utc": "2025-12-08T20:35:07.542068+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:34:49Z\",\n \"avg_ns\": 2495113484,\n \"stddev_ns\": 321317579,\n \"avg_ts\": 207.335005,\n \"stddev_ts\": 24.855881,\n \"samples_ns\": [ 2866075263, 2303673927, 2315591262 ],\n \"samples_ts\": [ 178.642, 222.254, 221.11 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:34:58Z\",\n \"avg_ns\": 2839307833,\n \"stddev_ns\": 22263781,\n \"avg_ts\": 45.083260,\n \"stddev_ts\": 0.353867,\n \"samples_ns\": [ 2841177197, 2860577995, 2816168307 ],\n \"samples_ts\": [ 45.0517, 44.7462, 45.4518 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:34:49Z", - "avg_ns": 2495113484, - "stddev_ns": 321317579, - "avg_ts": 207.335005, - "stddev_ts": 24.855881, - "samples_ns": [ - 2866075263, - 2303673927, - 2315591262 - ], - "samples_ts": [ - 178.642, - 222.254, - 221.11 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T20:34:58Z", - "avg_ns": 2839307833, - "stddev_ns": 22263781, - "avg_ts": 45.08326, - "stddev_ts": 0.353867, - "samples_ns": [ - 2841177197, - 2860577995, - 2816168307 - ], - "samples_ts": [ - 45.0517, - 44.7462, - 45.4518 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 38 - }, - { - "timestamp_utc": "2025-12-08T20:35:55.234287+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:35:08Z\",\n \"avg_ns\": 2495123882,\n \"stddev_ns\": 326665224,\n \"avg_ts\": 207.403722,\n \"stddev_ts\": 25.245988,\n \"samples_ns\": [ 2303948322, 2872312613, 2309110712 ],\n \"samples_ts\": [ 222.227, 178.254, 221.73 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:35:17Z\",\n \"avg_ns\": 12409318514,\n \"stddev_ns\": 286745338,\n \"avg_ts\": 41.274203,\n \"stddev_ts\": 0.966327,\n \"samples_ns\": [ 12079101620, 12553439679, 12595414245 ],\n \"samples_ts\": [ 42.3873, 40.7856, 40.6497 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:35:08Z", - "avg_ns": 2495123882, - "stddev_ns": 326665224, - "avg_ts": 207.403722, - "stddev_ts": 25.245988, - "samples_ns": [ - 2303948322, - 2872312613, - 2309110712 - ], - "samples_ts": [ - 222.227, - 178.254, - 221.73 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T20:35:17Z", - "avg_ns": 12409318514, - "stddev_ns": 286745338, - "avg_ts": 41.274203, - "stddev_ts": 0.966327, - "samples_ns": [ - 12079101620, - 12553439679, - 12595414245 - ], - "samples_ts": [ - 42.3873, - 40.7856, - 40.6497 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 39 - }, - { - "timestamp_utc": "2025-12-08T20:36:06.702084+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:35:55Z\",\n \"avg_ns\": 579298315,\n \"stddev_ns\": 1073220,\n \"avg_ts\": 220.957474,\n \"stddev_ts\": 0.408918,\n \"samples_ns\": [ 578734784, 578624242, 580535919 ],\n \"samples_ts\": [ 221.172, 221.214, 220.486 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:35:58Z\",\n \"avg_ns\": 2826761637,\n \"stddev_ns\": 10793691,\n \"avg_ts\": 45.281939,\n \"stddev_ts\": 0.173282,\n \"samples_ns\": [ 2814298665, 2833055207, 2832931041 ],\n \"samples_ts\": [ 45.482, 45.1809, 45.1829 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:35:55Z", - "avg_ns": 579298315, - "stddev_ns": 1073220, - "avg_ts": 220.957474, - "stddev_ts": 0.408918, - "samples_ns": [ - 578734784, - 578624242, - 580535919 - ], - "samples_ts": [ - 221.172, - 221.214, - 220.486 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T20:35:58Z", - "avg_ns": 2826761637, - "stddev_ns": 10793691, - "avg_ts": 45.281939, - "stddev_ts": 0.173282, - "samples_ns": [ - 2814298665, - 2833055207, - 2832931041 - ], - "samples_ts": [ - 45.482, - 45.1809, - 45.1829 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 40 - }, - { - "timestamp_utc": "2025-12-08T20:36:44.329039+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:36:07Z\",\n \"avg_ns\": 575681902,\n \"stddev_ns\": 1393883,\n \"avg_ts\": 222.345881,\n \"stddev_ts\": 0.538306,\n \"samples_ns\": [ 577036850, 574252865, 575755993 ],\n \"samples_ts\": [ 221.823, 222.898, 222.316 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:36:09Z\",\n \"avg_ns\": 11546315546,\n \"stddev_ns\": 33553157,\n \"avg_ts\": 44.343399,\n \"stddev_ts\": 0.128752,\n \"samples_ns\": [ 11516647930, 11582728910, 11539569799 ],\n \"samples_ts\": [ 44.4574, 44.2037, 44.3691 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:36:07Z", - "avg_ns": 575681902, - "stddev_ns": 1393883, - "avg_ts": 222.345881, - "stddev_ts": 0.538306, - "samples_ns": [ - 577036850, - 574252865, - 575755993 - ], - "samples_ts": [ - 221.823, - 222.898, - 222.316 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T20:36:09Z", - "avg_ns": 11546315546, - "stddev_ns": 33553157, - "avg_ts": 44.343399, - "stddev_ts": 0.128752, - "samples_ns": [ - 11516647930, - 11582728910, - 11539569799 - ], - "samples_ts": [ - 44.4574, - 44.2037, - 44.3691 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 41 - }, - { - "timestamp_utc": "2025-12-08T20:37:02.682041+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:36:44Z\",\n \"avg_ns\": 2314727097,\n \"stddev_ns\": 8431718,\n \"avg_ts\": 221.194337,\n \"stddev_ts\": 0.804982,\n \"samples_ns\": [ 2307140607, 2323805014, 2313235670 ],\n \"samples_ts\": [ 221.92, 220.328, 221.335 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:36:54Z\",\n \"avg_ns\": 2812376549,\n \"stddev_ns\": 6387873,\n \"avg_ts\": 45.513265,\n \"stddev_ts\": 0.103325,\n \"samples_ns\": [ 2806476069, 2811493989, 2819159590 ],\n \"samples_ts\": [ 45.6088, 45.5274, 45.4036 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:36:44Z", - "avg_ns": 2314727097, - "stddev_ns": 8431718, - "avg_ts": 221.194337, - "stddev_ts": 0.804982, - "samples_ns": [ - 2307140607, - 2323805014, - 2313235670 - ], - "samples_ts": [ - 221.92, - 220.328, - 221.335 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T20:36:54Z", - "avg_ns": 2812376549, - "stddev_ns": 6387873, - "avg_ts": 45.513265, - "stddev_ts": 0.103325, - "samples_ns": [ - 2806476069, - 2811493989, - 2819159590 - ], - "samples_ts": [ - 45.6088, - 45.5274, - 45.4036 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 42 - }, - { - "timestamp_utc": "2025-12-08T20:37:48.745690+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:37:03Z\",\n \"avg_ns\": 2318546910,\n \"stddev_ns\": 6334673,\n \"avg_ts\": 220.829065,\n \"stddev_ts\": 0.602760,\n \"samples_ns\": [ 2325528650, 2313166709, 2316945371 ],\n \"samples_ts\": [ 220.165, 221.342, 220.981 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:37:12Z\",\n \"avg_ns\": 12030804541,\n \"stddev_ns\": 94134618,\n \"avg_ts\": 42.559164,\n \"stddev_ts\": 0.334419,\n \"samples_ns\": [ 11922875468, 12073598199, 12095939958 ],\n \"samples_ts\": [ 42.9427, 42.4066, 42.3283 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:37:03Z", - "avg_ns": 2318546910, - "stddev_ns": 6334673, - "avg_ts": 220.829065, - "stddev_ts": 0.60276, - "samples_ns": [ - 2325528650, - 2313166709, - 2316945371 - ], - "samples_ts": [ - 220.165, - 221.342, - 220.981 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T20:37:12Z", - "avg_ns": 12030804541, - "stddev_ns": 94134618, - "avg_ts": 42.559164, - "stddev_ts": 0.334419, - "samples_ns": [ - 11922875468, - 12073598199, - 12095939958 - ], - "samples_ts": [ - 42.9427, - 42.4066, - 42.3283 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 43 - }, - { - "timestamp_utc": "2025-12-08T20:38:00.133570+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:37:49Z\",\n \"avg_ns\": 577026138,\n \"stddev_ns\": 1523204,\n \"avg_ts\": 221.828071,\n \"stddev_ts\": 0.586314,\n \"samples_ns\": [ 577953879, 577855896, 575268641 ],\n \"samples_ts\": [ 221.471, 221.509, 222.505 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:37:51Z\",\n \"avg_ns\": 2808192172,\n \"stddev_ns\": 5508978,\n \"avg_ts\": 45.581043,\n \"stddev_ts\": 0.089354,\n \"samples_ns\": [ 2806837229, 2803487346, 2814251942 ],\n \"samples_ts\": [ 45.6029, 45.6574, 45.4828 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:37:49Z", - "avg_ns": 577026138, - "stddev_ns": 1523204, - "avg_ts": 221.828071, - "stddev_ts": 0.586314, - "samples_ns": [ - 577953879, - 577855896, - 575268641 - ], - "samples_ts": [ - 221.471, - 221.509, - 222.505 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T20:37:51Z", - "avg_ns": 2808192172, - "stddev_ns": 5508978, - "avg_ts": 45.581043, - "stddev_ts": 0.089354, - "samples_ns": [ - 2806837229, - 2803487346, - 2814251942 - ], - "samples_ts": [ - 45.6029, - 45.6574, - 45.4828 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 44 - }, - { - "timestamp_utc": "2025-12-08T20:38:38.847070+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:38:01Z\",\n \"avg_ns\": 580898072,\n \"stddev_ns\": 1070207,\n \"avg_ts\": 220.348966,\n \"stddev_ts\": 0.405617,\n \"samples_ns\": [ 582103427, 580531367, 580059422 ],\n \"samples_ts\": [ 219.892, 220.488, 220.667 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:38:03Z\",\n \"avg_ns\": 11698621994,\n \"stddev_ns\": 354392332,\n \"avg_ts\": 43.792174,\n \"stddev_ts\": 1.304017,\n \"samples_ns\": [ 11509644182, 11478771171, 12107450629 ],\n \"samples_ts\": [ 44.4844, 44.6041, 42.288 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:38:01Z", - "avg_ns": 580898072, - "stddev_ns": 1070207, - "avg_ts": 220.348966, - "stddev_ts": 0.405617, - "samples_ns": [ - 582103427, - 580531367, - 580059422 - ], - "samples_ts": [ - 219.892, - 220.488, - 220.667 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T20:38:03Z", - "avg_ns": 11698621994, - "stddev_ns": 354392332, - "avg_ts": 43.792174, - "stddev_ts": 1.304017, - "samples_ns": [ - 11509644182, - 11478771171, - 12107450629 - ], - "samples_ts": [ - 44.4844, - 44.6041, - 42.288 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 45 - }, - { - "timestamp_utc": "2025-12-08T20:38:57.637985+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:38:39Z\",\n \"avg_ns\": 2434655743,\n \"stddev_ns\": 3756136,\n \"avg_ts\": 210.297005,\n \"stddev_ts\": 0.324201,\n \"samples_ns\": [ 2431801564, 2433254616, 2438911049 ],\n \"samples_ts\": [ 210.543, 210.418, 209.93 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:38:49Z\",\n \"avg_ns\": 2795762348,\n \"stddev_ns\": 3319000,\n \"avg_ts\": 45.783620,\n \"stddev_ts\": 0.054376,\n \"samples_ns\": [ 2797004575, 2792001868, 2798280602 ],\n \"samples_ts\": [ 45.7632, 45.8452, 45.7424 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:38:39Z", - "avg_ns": 2434655743, - "stddev_ns": 3756136, - "avg_ts": 210.297005, - "stddev_ts": 0.324201, - "samples_ns": [ - 2431801564, - 2433254616, - 2438911049 - ], - "samples_ts": [ - 210.543, - 210.418, - 209.93 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T20:38:49Z", - "avg_ns": 2795762348, - "stddev_ns": 3319000, - "avg_ts": 45.78362, - "stddev_ts": 0.054376, - "samples_ns": [ - 2797004575, - 2792001868, - 2798280602 - ], - "samples_ts": [ - 45.7632, - 45.8452, - 45.7424 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 46 - }, - { - "timestamp_utc": "2025-12-08T20:39:43.051267+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:38:58Z\",\n \"avg_ns\": 2450131270,\n \"stddev_ns\": 5418226,\n \"avg_ts\": 208.969076,\n \"stddev_ts\": 0.461508,\n \"samples_ns\": [ 2447501652, 2456361984, 2446530176 ],\n \"samples_ts\": [ 209.193, 208.438, 209.276 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:39:08Z\",\n \"avg_ns\": 11644079200,\n \"stddev_ns\": 343732728,\n \"avg_ts\": 43.995986,\n \"stddev_ts\": 1.277523,\n \"samples_ns\": [ 11470962645, 12039951529, 11421323426 ],\n \"samples_ts\": [ 44.6344, 42.5251, 44.8284 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:38:58Z", - "avg_ns": 2450131270, - "stddev_ns": 5418226, - "avg_ts": 208.969076, - "stddev_ts": 0.461508, - "samples_ns": [ - 2447501652, - 2456361984, - 2446530176 - ], - "samples_ts": [ - 209.193, - 208.438, - 209.276 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T20:39:08Z", - "avg_ns": 11644079200, - "stddev_ns": 343732728, - "avg_ts": 43.995986, - "stddev_ts": 1.277523, - "samples_ns": [ - 11470962645, - 12039951529, - 11421323426 - ], - "samples_ts": [ - 44.6344, - 42.5251, - 44.8284 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 47 - }, - { - "timestamp_utc": "2025-12-08T20:39:54.452093+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:39:43Z\",\n \"avg_ns\": 574078281,\n \"stddev_ns\": 4711970,\n \"avg_ts\": 222.976088,\n \"stddev_ts\": 1.823903,\n \"samples_ns\": [ 579343625, 570258751, 572632469 ],\n \"samples_ts\": [ 220.94, 224.46, 223.529 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:39:45Z\",\n \"avg_ns\": 2812472049,\n \"stddev_ns\": 24740222,\n \"avg_ts\": 45.513900,\n \"stddev_ts\": 0.398370,\n \"samples_ns\": [ 2799520355, 2796896457, 2840999336 ],\n \"samples_ts\": [ 45.7221, 45.765, 45.0546 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:39:43Z", - "avg_ns": 574078281, - "stddev_ns": 4711970, - "avg_ts": 222.976088, - "stddev_ts": 1.823903, - "samples_ns": [ - 579343625, - 570258751, - 572632469 - ], - "samples_ts": [ - 220.94, - 224.46, - 223.529 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T20:39:45Z", - "avg_ns": 2812472049, - "stddev_ns": 24740222, - "avg_ts": 45.5139, - "stddev_ts": 0.39837, - "samples_ns": [ - 2799520355, - 2796896457, - 2840999336 - ], - "samples_ts": [ - 45.7221, - 45.765, - 45.0546 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 48 - }, - { - "timestamp_utc": "2025-12-08T20:40:32.473715+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:39:55Z\",\n \"avg_ns\": 574382451,\n \"stddev_ns\": 1084571,\n \"avg_ts\": 222.848563,\n \"stddev_ts\": 0.420232,\n \"samples_ns\": [ 575633970, 573725117, 573788267 ],\n \"samples_ts\": [ 222.364, 223.103, 223.079 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:39:57Z\",\n \"avg_ns\": 11500321390,\n \"stddev_ns\": 21943769,\n \"avg_ts\": 44.520603,\n \"stddev_ts\": 0.085039,\n \"samples_ns\": [ 11514817319, 11475075688, 11511071164 ],\n \"samples_ts\": [ 44.4644, 44.6184, 44.4789 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:39:55Z", - "avg_ns": 574382451, - "stddev_ns": 1084571, - "avg_ts": 222.848563, - "stddev_ts": 0.420232, - "samples_ns": [ - 575633970, - 573725117, - 573788267 - ], - "samples_ts": [ - 222.364, - 223.103, - 223.079 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T20:39:57Z", - "avg_ns": 11500321390, - "stddev_ns": 21943769, - "avg_ts": 44.520603, - "stddev_ts": 0.085039, - "samples_ns": [ - 11514817319, - 11475075688, - 11511071164 - ], - "samples_ts": [ - 44.4644, - 44.6184, - 44.4789 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 49 - }, - { - "timestamp_utc": "2025-12-08T20:40:51.446185+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:40:33Z\",\n \"avg_ns\": 2309078230,\n \"stddev_ns\": 2894093,\n \"avg_ts\": 221.733733,\n \"stddev_ts\": 0.277688,\n \"samples_ns\": [ 2307029417, 2307816759, 2312388515 ],\n \"samples_ts\": [ 221.93, 221.855, 221.416 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:40:42Z\",\n \"avg_ns\": 3025721130,\n \"stddev_ns\": 359084082,\n \"avg_ts\": 42.679005,\n \"stddev_ts\": 4.740253,\n \"samples_ns\": [ 2817134518, 2819675755, 3440353117 ],\n \"samples_ts\": [ 45.4362, 45.3953, 37.2055 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:40:33Z", - "avg_ns": 2309078230, - "stddev_ns": 2894093, - "avg_ts": 221.733733, - "stddev_ts": 0.277688, - "samples_ns": [ - 2307029417, - 2307816759, - 2312388515 - ], - "samples_ts": [ - 221.93, - 221.855, - 221.416 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T20:40:42Z", - "avg_ns": 3025721130, - "stddev_ns": 359084082, - "avg_ts": 42.679005, - "stddev_ts": 4.740253, - "samples_ns": [ - 2817134518, - 2819675755, - 3440353117 - ], - "samples_ts": [ - 45.4362, - 45.3953, - 37.2055 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 50 - }, - { - "timestamp_utc": "2025-12-08T20:41:36.536982+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:40:52Z\",\n \"avg_ns\": 2306956797,\n \"stddev_ns\": 18431768,\n \"avg_ts\": 221.946806,\n \"stddev_ts\": 1.765361,\n \"samples_ns\": [ 2297812534, 2294885173, 2328172685 ],\n \"samples_ts\": [ 222.821, 223.105, 219.915 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:41:01Z\",\n \"avg_ns\": 11722638475,\n \"stddev_ns\": 389637732,\n \"avg_ts\": 43.707763,\n \"stddev_ts\": 1.425581,\n \"samples_ns\": [ 12172247366, 11483466576, 11512201485 ],\n \"samples_ts\": [ 42.0629, 44.5858, 44.4746 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:40:52Z", - "avg_ns": 2306956797, - "stddev_ns": 18431768, - "avg_ts": 221.946806, - "stddev_ts": 1.765361, - "samples_ns": [ - 2297812534, - 2294885173, - 2328172685 - ], - "samples_ts": [ - 222.821, - 223.105, - 219.915 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T20:41:01Z", - "avg_ns": 11722638475, - "stddev_ns": 389637732, - "avg_ts": 43.707763, - "stddev_ts": 1.425581, - "samples_ns": [ - 12172247366, - 11483466576, - 11512201485 - ], - "samples_ts": [ - 42.0629, - 44.5858, - 44.4746 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 51 - }, - { - "timestamp_utc": "2025-12-08T20:41:47.875923+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:41:37Z\",\n \"avg_ns\": 574911077,\n \"stddev_ns\": 774221,\n \"avg_ts\": 222.643396,\n \"stddev_ts\": 0.299598,\n \"samples_ns\": [ 574432583, 575804313, 574496335 ],\n \"samples_ts\": [ 222.829, 222.298, 222.804 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:41:39Z\",\n \"avg_ns\": 2797233224,\n \"stddev_ns\": 12142559,\n \"avg_ts\": 45.760076,\n \"stddev_ts\": 0.198201,\n \"samples_ns\": [ 2792260845, 2788366073, 2811072754 ],\n \"samples_ts\": [ 45.841, 45.905, 45.5342 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:41:37Z", - "avg_ns": 574911077, - "stddev_ns": 774221, - "avg_ts": 222.643396, - "stddev_ts": 0.299598, - "samples_ns": [ - 574432583, - 575804313, - 574496335 - ], - "samples_ts": [ - 222.829, - 222.298, - 222.804 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T20:41:39Z", - "avg_ns": 2797233224, - "stddev_ns": 12142559, - "avg_ts": 45.760076, - "stddev_ts": 0.198201, - "samples_ns": [ - 2792260845, - 2788366073, - 2811072754 - ], - "samples_ts": [ - 45.841, - 45.905, - 45.5342 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 52 - }, - { - "timestamp_utc": "2025-12-08T20:42:26.047329+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:41:48Z\",\n \"avg_ns\": 581809055,\n \"stddev_ns\": 1008654,\n \"avg_ts\": 220.003892,\n \"stddev_ts\": 0.381573,\n \"samples_ns\": [ 580683707, 582630565, 582112894 ],\n \"samples_ts\": [ 220.43, 219.693, 219.889 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:41:50Z\",\n \"avg_ns\": 11728586943,\n \"stddev_ns\": 356092953,\n \"avg_ts\": 43.680418,\n \"stddev_ts\": 1.304083,\n \"samples_ns\": [ 11553502440, 12138326733, 11493931656 ],\n \"samples_ts\": [ 44.3156, 42.1804, 44.5452 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:41:48Z", - "avg_ns": 581809055, - "stddev_ns": 1008654, - "avg_ts": 220.003892, - "stddev_ts": 0.381573, - "samples_ns": [ - 580683707, - 582630565, - 582112894 - ], - "samples_ts": [ - 220.43, - 219.693, - 219.889 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T20:41:50Z", - "avg_ns": 11728586943, - "stddev_ns": 356092953, - "avg_ts": 43.680418, - "stddev_ts": 1.304083, - "samples_ns": [ - 11553502440, - 12138326733, - 11493931656 - ], - "samples_ts": [ - 44.3156, - 42.1804, - 44.5452 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 53 - }, - { - "timestamp_utc": "2025-12-08T20:42:45.152129+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:42:26Z\",\n \"avg_ns\": 2319984563,\n \"stddev_ns\": 4780435,\n \"avg_ts\": 220.691748,\n \"stddev_ts\": 0.454955,\n \"samples_ns\": [ 2314819557, 2320881850, 2324252284 ],\n \"samples_ts\": [ 221.184, 220.606, 220.286 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:42:35Z\",\n \"avg_ns\": 3053198896,\n \"stddev_ns\": 17575620,\n \"avg_ts\": 41.924167,\n \"stddev_ts\": 0.240686,\n \"samples_ns\": [ 3039641571, 3046899005, 3073056113 ],\n \"samples_ts\": [ 42.1102, 42.0099, 41.6523 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:42:26Z", - "avg_ns": 2319984563, - "stddev_ns": 4780435, - "avg_ts": 220.691748, - "stddev_ts": 0.454955, - "samples_ns": [ - 2314819557, - 2320881850, - 2324252284 - ], - "samples_ts": [ - 221.184, - 220.606, - 220.286 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T20:42:35Z", - "avg_ns": 3053198896, - "stddev_ns": 17575620, - "avg_ts": 41.924167, - "stddev_ts": 0.240686, - "samples_ns": [ - 3039641571, - 3046899005, - 3073056113 - ], - "samples_ts": [ - 42.1102, - 42.0099, - 41.6523 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 54 - }, - { - "timestamp_utc": "2025-12-08T20:43:32.586467+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:42:45Z\",\n \"avg_ns\": 2327831447,\n \"stddev_ns\": 2538417,\n \"avg_ts\": 219.947370,\n \"stddev_ts\": 0.239737,\n \"samples_ns\": [ 2330665010, 2327064092, 2325765239 ],\n \"samples_ts\": [ 219.68, 220.02, 220.143 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:42:55Z\",\n \"avg_ns\": 12481148335,\n \"stddev_ns\": 530681227,\n \"avg_ts\": 41.070956,\n \"stddev_ts\": 1.732281,\n \"samples_ns\": [ 11992199896, 12405748152, 13045496957 ],\n \"samples_ts\": [ 42.6944, 41.2712, 39.2473 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:42:45Z", - "avg_ns": 2327831447, - "stddev_ns": 2538417, - "avg_ts": 219.94737, - "stddev_ts": 0.239737, - "samples_ns": [ - 2330665010, - 2327064092, - 2325765239 - ], - "samples_ts": [ - 219.68, - 220.02, - 220.143 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T20:42:55Z", - "avg_ns": 12481148335, - "stddev_ns": 530681227, - "avg_ts": 41.070956, - "stddev_ts": 1.732281, - "samples_ns": [ - 11992199896, - 12405748152, - 13045496957 - ], - "samples_ts": [ - 42.6944, - 41.2712, - 39.2473 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 55 - }, - { - "timestamp_utc": "2025-12-08T20:43:44.594549+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:43:33Z\",\n \"avg_ns\": 574684854,\n \"stddev_ns\": 2173142,\n \"avg_ts\": 222.732895,\n \"stddev_ts\": 0.843188,\n \"samples_ns\": [ 576586156, 575152217, 572316190 ],\n \"samples_ts\": [ 221.996, 222.55, 223.653 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:43:35Z\",\n \"avg_ns\": 3010816385,\n \"stddev_ns\": 346673238,\n \"avg_ts\": 42.868726,\n \"stddev_ts\": 4.629177,\n \"samples_ns\": [ 3411014200, 2818693102, 2802741855 ],\n \"samples_ts\": [ 37.5255, 45.4111, 45.6696 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:43:33Z", - "avg_ns": 574684854, - "stddev_ns": 2173142, - "avg_ts": 222.732895, - "stddev_ts": 0.843188, - "samples_ns": [ - 576586156, - 575152217, - 572316190 - ], - "samples_ts": [ - 221.996, - 222.55, - 223.653 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T20:43:35Z", - "avg_ns": 3010816385, - "stddev_ns": 346673238, - "avg_ts": 42.868726, - "stddev_ts": 4.629177, - "samples_ns": [ - 3411014200, - 2818693102, - 2802741855 - ], - "samples_ts": [ - 37.5255, - 45.4111, - 45.6696 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 56 - }, - { - "timestamp_utc": "2025-12-08T20:44:22.918089+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:43:45Z\",\n \"avg_ns\": 577242890,\n \"stddev_ns\": 1110237,\n \"avg_ts\": 221.744292,\n \"stddev_ts\": 0.426897,\n \"samples_ns\": [ 577673151, 575981918, 578073601 ],\n \"samples_ts\": [ 221.579, 222.229, 221.425 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:43:47Z\",\n \"avg_ns\": 11778182808,\n \"stddev_ns\": 512397093,\n \"avg_ts\": 43.523777,\n \"stddev_ts\": 1.847199,\n \"samples_ns\": [ 11469638282, 12369663220, 11495246924 ],\n \"samples_ts\": [ 44.6396, 41.3916, 44.5401 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:43:45Z", - "avg_ns": 577242890, - "stddev_ns": 1110237, - "avg_ts": 221.744292, - "stddev_ts": 0.426897, - "samples_ns": [ - 577673151, - 575981918, - 578073601 - ], - "samples_ts": [ - 221.579, - 222.229, - 221.425 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T20:43:47Z", - "avg_ns": 11778182808, - "stddev_ns": 512397093, - "avg_ts": 43.523777, - "stddev_ts": 1.847199, - "samples_ns": [ - 11469638282, - 12369663220, - 11495246924 - ], - "samples_ts": [ - 44.6396, - 41.3916, - 44.5401 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 57 - }, - { - "timestamp_utc": "2025-12-08T20:44:41.804635+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:44:23Z\",\n \"avg_ns\": 2445445614,\n \"stddev_ns\": 5331366,\n \"avg_ts\": 209.369458,\n \"stddev_ts\": 0.456733,\n \"samples_ns\": [ 2446488672, 2439669802, 2450178368 ],\n \"samples_ts\": [ 209.28, 209.864, 208.964 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:44:33Z\",\n \"avg_ns\": 2800146562,\n \"stddev_ns\": 6704877,\n \"avg_ts\": 45.712068,\n \"stddev_ts\": 0.109509,\n \"samples_ns\": [ 2801172314, 2792988338, 2806279036 ],\n \"samples_ts\": [ 45.6952, 45.829, 45.612 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:44:23Z", - "avg_ns": 2445445614, - "stddev_ns": 5331366, - "avg_ts": 209.369458, - "stddev_ts": 0.456733, - "samples_ns": [ - 2446488672, - 2439669802, - 2450178368 - ], - "samples_ts": [ - 209.28, - 209.864, - 208.964 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T20:44:33Z", - "avg_ns": 2800146562, - "stddev_ns": 6704877, - "avg_ts": 45.712068, - "stddev_ts": 0.109509, - "samples_ns": [ - 2801172314, - 2792988338, - 2806279036 - ], - "samples_ts": [ - 45.6952, - 45.829, - 45.612 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 58 - }, - { - "timestamp_utc": "2025-12-08T20:45:30.042080+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:44:42Z\",\n \"avg_ns\": 2444604195,\n \"stddev_ns\": 9017875,\n \"avg_ts\": 209.442760,\n \"stddev_ts\": 0.773439,\n \"samples_ns\": [ 2446413768, 2434818746, 2452580071 ],\n \"samples_ts\": [ 209.286, 210.283, 208.76 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:44:52Z\",\n \"avg_ns\": 12595189375,\n \"stddev_ns\": 295210400,\n \"avg_ts\": 40.665175,\n \"stddev_ts\": 0.943007,\n \"samples_ns\": [ 12491449583, 12365851680, 12928266863 ],\n \"samples_ts\": [ 40.988, 41.4043, 39.6031 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:44:42Z", - "avg_ns": 2444604195, - "stddev_ns": 9017875, - "avg_ts": 209.44276, - "stddev_ts": 0.773439, - "samples_ns": [ - 2446413768, - 2434818746, - 2452580071 - ], - "samples_ts": [ - 209.286, - 210.283, - 208.76 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T20:44:52Z", - "avg_ns": 12595189375, - "stddev_ns": 295210400, - "avg_ts": 40.665175, - "stddev_ts": 0.943007, - "samples_ns": [ - 12491449583, - 12365851680, - 12928266863 - ], - "samples_ts": [ - 40.988, - 41.4043, - 39.6031 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 59 - }, - { - "timestamp_utc": "2025-12-08T20:45:42.050605+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:45:30Z\",\n \"avg_ns\": 577283298,\n \"stddev_ns\": 3080407,\n \"avg_ts\": 221.732426,\n \"stddev_ts\": 1.181631,\n \"samples_ns\": [ 580581479, 576787143, 574481274 ],\n \"samples_ts\": [ 220.469, 221.919, 222.81 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:45:32Z\",\n \"avg_ns\": 3010717208,\n \"stddev_ns\": 372379337,\n \"avg_ts\": 42.923368,\n \"stddev_ts\": 4.955118,\n \"samples_ns\": [ 2795687544, 2795760253, 3440703827 ],\n \"samples_ts\": [ 45.7848, 45.7836, 37.2017 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:45:30Z", - "avg_ns": 577283298, - "stddev_ns": 3080407, - "avg_ts": 221.732426, - "stddev_ts": 1.181631, - "samples_ns": [ - 580581479, - 576787143, - 574481274 - ], - "samples_ts": [ - 220.469, - 221.919, - 222.81 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T20:45:32Z", - "avg_ns": 3010717208, - "stddev_ns": 372379337, - "avg_ts": 42.923368, - "stddev_ts": 4.955118, - "samples_ns": [ - 2795687544, - 2795760253, - 3440703827 - ], - "samples_ts": [ - 45.7848, - 45.7836, - 37.2017 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 60 - }, - { - "timestamp_utc": "2025-12-08T20:46:19.746452+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:45:42Z\",\n \"avg_ns\": 576218892,\n \"stddev_ns\": 244991,\n \"avg_ts\": 222.137832,\n \"stddev_ts\": 0.094465,\n \"samples_ns\": [ 576412717, 575943528, 576300431 ],\n \"samples_ts\": [ 222.063, 222.244, 222.106 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:45:44Z\",\n \"avg_ns\": 11573463981,\n \"stddev_ns\": 50717554,\n \"avg_ts\": 44.239700,\n \"stddev_ts\": 0.194334,\n \"samples_ns\": [ 11597073058, 11608073115, 11515245770 ],\n \"samples_ts\": [ 44.1491, 44.1072, 44.4628 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:45:42Z", - "avg_ns": 576218892, - "stddev_ns": 244991, - "avg_ts": 222.137832, - "stddev_ts": 0.094465, - "samples_ns": [ - 576412717, - 575943528, - 576300431 - ], - "samples_ts": [ - 222.063, - 222.244, - 222.106 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T20:45:44Z", - "avg_ns": 11573463981, - "stddev_ns": 50717554, - "avg_ts": 44.2397, - "stddev_ts": 0.194334, - "samples_ns": [ - 11597073058, - 11608073115, - 11515245770 - ], - "samples_ts": [ - 44.1491, - 44.1072, - 44.4628 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 61 - }, - { - "timestamp_utc": "2025-12-08T20:46:38.131095+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:46:20Z\",\n \"avg_ns\": 2315897818,\n \"stddev_ns\": 11956558,\n \"avg_ts\": 221.084505,\n \"stddev_ts\": 1.144390,\n \"samples_ns\": [ 2320652794, 2324745389, 2302295271 ],\n \"samples_ts\": [ 220.628, 220.239, 222.387 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:46:29Z\",\n \"avg_ns\": 2815069039,\n \"stddev_ns\": 961272,\n \"avg_ts\": 45.469581,\n \"stddev_ts\": 0.015506,\n \"samples_ns\": [ 2813961085, 2815599404, 2815646629 ],\n \"samples_ts\": [ 45.4875, 45.461, 45.4603 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:46:20Z", - "avg_ns": 2315897818, - "stddev_ns": 11956558, - "avg_ts": 221.084505, - "stddev_ts": 1.14439, - "samples_ns": [ - 2320652794, - 2324745389, - 2302295271 - ], - "samples_ts": [ - 220.628, - 220.239, - 222.387 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T20:46:29Z", - "avg_ns": 2815069039, - "stddev_ns": 961272, - "avg_ts": 45.469581, - "stddev_ts": 0.015506, - "samples_ns": [ - 2813961085, - 2815599404, - 2815646629 - ], - "samples_ts": [ - 45.4875, - 45.461, - 45.4603 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 62 - }, - { - "timestamp_utc": "2025-12-08T20:47:23.079311+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:46:38Z\",\n \"avg_ns\": 2493109732,\n \"stddev_ns\": 329162382,\n \"avg_ts\": 207.607782,\n \"stddev_ts\": 25.469140,\n \"samples_ns\": [ 2301190453, 2304951243, 2873187502 ],\n \"samples_ts\": [ 222.494, 222.131, 178.199 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:46:48Z\",\n \"avg_ns\": 11493996197,\n \"stddev_ns\": 57881233,\n \"avg_ts\": 44.545747,\n \"stddev_ts\": 0.224012,\n \"samples_ns\": [ 11442464073, 11556620851, 11482903668 ],\n \"samples_ts\": [ 44.7456, 44.3036, 44.588 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:46:38Z", - "avg_ns": 2493109732, - "stddev_ns": 329162382, - "avg_ts": 207.607782, - "stddev_ts": 25.46914, - "samples_ns": [ - 2301190453, - 2304951243, - 2873187502 - ], - "samples_ts": [ - 222.494, - 222.131, - 178.199 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T20:46:48Z", - "avg_ns": 11493996197, - "stddev_ns": 57881233, - "avg_ts": 44.545747, - "stddev_ts": 0.224012, - "samples_ns": [ - 11442464073, - 11556620851, - 11482903668 - ], - "samples_ts": [ - 44.7456, - 44.3036, - 44.588 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 63 - }, - { - "timestamp_utc": "2025-12-08T20:47:34.446734+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:47:23Z\",\n \"avg_ns\": 577192485,\n \"stddev_ns\": 1301344,\n \"avg_ts\": 221.763861,\n \"stddev_ts\": 0.500038,\n \"samples_ns\": [ 575873939, 577227598, 578475918 ],\n \"samples_ts\": [ 222.271, 221.75, 221.271 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:47:25Z\",\n \"avg_ns\": 2797604861,\n \"stddev_ns\": 9714213,\n \"avg_ts\": 45.753792,\n \"stddev_ts\": 0.159083,\n \"samples_ns\": [ 2786835347, 2800273166, 2805706070 ],\n \"samples_ts\": [ 45.9302, 45.7098, 45.6213 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:47:23Z", - "avg_ns": 577192485, - "stddev_ns": 1301344, - "avg_ts": 221.763861, - "stddev_ts": 0.500038, - "samples_ns": [ - 575873939, - 577227598, - 578475918 - ], - "samples_ts": [ - 222.271, - 221.75, - 221.271 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T20:47:25Z", - "avg_ns": 2797604861, - "stddev_ns": 9714213, - "avg_ts": 45.753792, - "stddev_ts": 0.159083, - "samples_ns": [ - 2786835347, - 2800273166, - 2805706070 - ], - "samples_ts": [ - 45.9302, - 45.7098, - 45.6213 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 64 - }, - { - "timestamp_utc": "2025-12-08T20:48:15.094791+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:47:35Z\",\n \"avg_ns\": 576542221,\n \"stddev_ns\": 1612437,\n \"avg_ts\": 222.014385,\n \"stddev_ts\": 0.620103,\n \"samples_ns\": [ 575962407, 578364387, 575299869 ],\n \"samples_ts\": [ 222.237, 221.314, 222.493 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:47:37Z\",\n \"avg_ns\": 12559838641,\n \"stddev_ns\": 595604802,\n \"avg_ts\": 40.825778,\n \"stddev_ts\": 1.927784,\n \"samples_ns\": [ 11988088843, 12514697386, 13176729695 ],\n \"samples_ts\": [ 42.7091, 40.9119, 38.8564 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:47:35Z", - "avg_ns": 576542221, - "stddev_ns": 1612437, - "avg_ts": 222.014385, - "stddev_ts": 0.620103, - "samples_ns": [ - 575962407, - 578364387, - 575299869 - ], - "samples_ts": [ - 222.237, - 221.314, - 222.493 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T20:47:37Z", - "avg_ns": 12559838641, - "stddev_ns": 595604802, - "avg_ts": 40.825778, - "stddev_ts": 1.927784, - "samples_ns": [ - 11988088843, - 12514697386, - 13176729695 - ], - "samples_ts": [ - 42.7091, - 40.9119, - 38.8564 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 65 - }, - { - "timestamp_utc": "2025-12-08T20:48:34.328136+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:48:15Z\",\n \"avg_ns\": 2352792412,\n \"stddev_ns\": 7211421,\n \"avg_ts\": 217.615124,\n \"stddev_ts\": 0.668051,\n \"samples_ns\": [ 2344547061, 2357920337, 2355909840 ],\n \"samples_ts\": [ 218.379, 217.14, 217.326 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:48:25Z\",\n \"avg_ns\": 3045824895,\n \"stddev_ns\": 17121234,\n \"avg_ts\": 42.025624,\n \"stddev_ts\": 0.235894,\n \"samples_ns\": [ 3042778653, 3030431340, 3064264693 ],\n \"samples_ts\": [ 42.0668, 42.2382, 41.7718 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:48:15Z", - "avg_ns": 2352792412, - "stddev_ns": 7211421, - "avg_ts": 217.615124, - "stddev_ts": 0.668051, - "samples_ns": [ - 2344547061, - 2357920337, - 2355909840 - ], - "samples_ts": [ - 218.379, - 217.14, - 217.326 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T20:48:25Z", - "avg_ns": 3045824895, - "stddev_ns": 17121234, - "avg_ts": 42.025624, - "stddev_ts": 0.235894, - "samples_ns": [ - 3042778653, - 3030431340, - 3064264693 - ], - "samples_ts": [ - 42.0668, - 42.2382, - 41.7718 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 66 - }, - { - "timestamp_utc": "2025-12-08T20:49:20.053725+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:48:34Z\",\n \"avg_ns\": 2519763154,\n \"stddev_ns\": 347982398,\n \"avg_ts\": 205.616121,\n \"stddev_ts\": 26.312423,\n \"samples_ns\": [ 2921335448, 2331081529, 2306872485 ],\n \"samples_ts\": [ 175.262, 219.641, 221.946 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:48:44Z\",\n \"avg_ns\": 11724050159,\n \"stddev_ns\": 377929624,\n \"avg_ts\": 43.700648,\n \"stddev_ts\": 1.383546,\n \"samples_ns\": [ 11532554627, 12159397538, 11480198312 ],\n \"samples_ts\": [ 44.3961, 42.1073, 44.5985 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:48:34Z", - "avg_ns": 2519763154, - "stddev_ns": 347982398, - "avg_ts": 205.616121, - "stddev_ts": 26.312423, - "samples_ns": [ - 2921335448, - 2331081529, - 2306872485 - ], - "samples_ts": [ - 175.262, - 219.641, - 221.946 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T20:48:44Z", - "avg_ns": 11724050159, - "stddev_ns": 377929624, - "avg_ts": 43.700648, - "stddev_ts": 1.383546, - "samples_ns": [ - 11532554627, - 12159397538, - 11480198312 - ], - "samples_ts": [ - 44.3961, - 42.1073, - 44.5985 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 67 - }, - { - "timestamp_utc": "2025-12-08T20:49:32.072868+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:49:20Z\",\n \"avg_ns\": 772760189,\n \"stddev_ns\": 330591825,\n \"avg_ts\": 183.605277,\n \"stddev_ts\": 62.991350,\n \"samples_ns\": [ 583177232, 580611465, 1154491872 ],\n \"samples_ts\": [ 219.487, 220.457, 110.871 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:49:23Z\",\n \"avg_ns\": 2812751928,\n \"stddev_ns\": 9314500,\n \"avg_ts\": 45.507368,\n \"stddev_ts\": 0.150959,\n \"samples_ns\": [ 2816829715, 2819331796, 2802094275 ],\n \"samples_ts\": [ 45.4412, 45.4008, 45.6801 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:49:20Z", - "avg_ns": 772760189, - "stddev_ns": 330591825, - "avg_ts": 183.605277, - "stddev_ts": 62.99135, - "samples_ns": [ - 583177232, - 580611465, - 1154491872 - ], - "samples_ts": [ - 219.487, - 220.457, - 110.871 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T20:49:23Z", - "avg_ns": 2812751928, - "stddev_ns": 9314500, - "avg_ts": 45.507368, - "stddev_ts": 0.150959, - "samples_ns": [ - 2816829715, - 2819331796, - 2802094275 - ], - "samples_ts": [ - 45.4412, - 45.4008, - 45.6801 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 68 - }, - { - "timestamp_utc": "2025-12-08T20:50:09.542464+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:49:32Z\",\n \"avg_ns\": 577353556,\n \"stddev_ns\": 939982,\n \"avg_ts\": 221.701632,\n \"stddev_ts\": 0.360817,\n \"samples_ns\": [ 578232256, 577464959, 576363455 ],\n \"samples_ts\": [ 221.364, 221.658, 222.082 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:49:34Z\",\n \"avg_ns\": 11497163227,\n \"stddev_ns\": 19544097,\n \"avg_ts\": 44.532810,\n \"stddev_ts\": 0.075645,\n \"samples_ns\": [ 11491003737, 11481441530, 11519044416 ],\n \"samples_ts\": [ 44.5566, 44.5937, 44.4481 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:49:32Z", - "avg_ns": 577353556, - "stddev_ns": 939982, - "avg_ts": 221.701632, - "stddev_ts": 0.360817, - "samples_ns": [ - 578232256, - 577464959, - 576363455 - ], - "samples_ts": [ - 221.364, - 221.658, - 222.082 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T20:49:34Z", - "avg_ns": 11497163227, - "stddev_ns": 19544097, - "avg_ts": 44.53281, - "stddev_ts": 0.075645, - "samples_ns": [ - 11491003737, - 11481441530, - 11519044416 - ], - "samples_ts": [ - 44.5566, - 44.5937, - 44.4481 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 69 - }, - { - "timestamp_utc": "2025-12-08T20:50:28.428058+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:50:10Z\",\n \"avg_ns\": 2447581001,\n \"stddev_ns\": 13402510,\n \"avg_ts\": 209.190304,\n \"stddev_ts\": 1.143182,\n \"samples_ns\": [ 2462394748, 2444051895, 2436296362 ],\n \"samples_ts\": [ 207.928, 209.488, 210.155 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:50:19Z\",\n \"avg_ns\": 2804527873,\n \"stddev_ns\": 16877223,\n \"avg_ts\": 45.641581,\n \"stddev_ts\": 0.274259,\n \"samples_ns\": [ 2822650163, 2801673340, 2789260118 ],\n \"samples_ts\": [ 45.3475, 45.687, 45.8903 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:50:10Z", - "avg_ns": 2447581001, - "stddev_ns": 13402510, - "avg_ts": 209.190304, - "stddev_ts": 1.143182, - "samples_ns": [ - 2462394748, - 2444051895, - 2436296362 - ], - "samples_ts": [ - 207.928, - 209.488, - 210.155 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T20:50:19Z", - "avg_ns": 2804527873, - "stddev_ns": 16877223, - "avg_ts": 45.641581, - "stddev_ts": 0.274259, - "samples_ns": [ - 2822650163, - 2801673340, - 2789260118 - ], - "samples_ts": [ - 45.3475, - 45.687, - 45.8903 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 70 - }, - { - "timestamp_utc": "2025-12-08T20:51:14.048549+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:50:29Z\",\n \"avg_ns\": 2452497937,\n \"stddev_ns\": 7101791,\n \"avg_ts\": 208.767909,\n \"stddev_ts\": 0.605482,\n \"samples_ns\": [ 2455930571, 2457230909, 2444332333 ],\n \"samples_ts\": [ 208.475, 208.365, 209.464 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:50:38Z\",\n \"avg_ns\": 11707597773,\n \"stddev_ns\": 368851230,\n \"avg_ts\": 43.760725,\n \"stddev_ts\": 1.354067,\n \"samples_ns\": [ 12133506024, 11496328955, 11492958341 ],\n \"samples_ts\": [ 42.1972, 44.536, 44.549 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:50:29Z", - "avg_ns": 2452497937, - "stddev_ns": 7101791, - "avg_ts": 208.767909, - "stddev_ts": 0.605482, - "samples_ns": [ - 2455930571, - 2457230909, - 2444332333 - ], - "samples_ts": [ - 208.475, - 208.365, - 209.464 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_type": "gemma3 270M Q4_K - Medium", - "model_size": 246587904, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T20:50:38Z", - "avg_ns": 11707597773, - "stddev_ns": 368851230, - "avg_ts": 43.760725, - "stddev_ts": 1.354067, - "samples_ns": [ - 12133506024, - 11496328955, - 11492958341 - ], - "samples_ts": [ - 42.1972, - 44.536, - 44.549 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 71 - }, - { - "timestamp_utc": "2025-12-08T20:51:34.878168+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:51:15Z\",\n \"avg_ns\": 949565670,\n \"stddev_ns\": 5868505,\n \"avg_ts\": 134.801913,\n \"stddev_ts\": 0.835478,\n \"samples_ns\": [ 954118291, 951635933, 942942788 ],\n \"samples_ts\": [ 134.155, 134.505, 135.745 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:51:19Z\",\n \"avg_ns\": 5047061830,\n \"stddev_ns\": 40144183,\n \"avg_ts\": 25.362360,\n \"stddev_ts\": 0.201712,\n \"samples_ns\": [ 5046637398, 5087416547, 5007131545 ],\n \"samples_ts\": [ 25.3634, 25.1601, 25.5635 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:51:15Z", - "avg_ns": 949565670, - "stddev_ns": 5868505, - "avg_ts": 134.801913, - "stddev_ts": 0.835478, - "samples_ns": [ - 954118291, - 951635933, - 942942788 - ], - "samples_ts": [ - 134.155, - 134.505, - 135.745 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T20:51:19Z", - "avg_ns": 5047061830, - "stddev_ns": 40144183, - "avg_ts": 25.36236, - "stddev_ts": 0.201712, - "samples_ns": [ - 5046637398, - 5087416547, - 5007131545 - ], - "samples_ts": [ - 25.3634, - 25.1601, - 25.5635 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 72 - }, - { - "timestamp_utc": "2025-12-08T20:52:42.698345+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:51:35Z\",\n \"avg_ns\": 937518237,\n \"stddev_ns\": 1706502,\n \"avg_ts\": 136.530979,\n \"stddev_ts\": 0.248665,\n \"samples_ns\": [ 938784147, 935578233, 938192333 ],\n \"samples_ts\": [ 136.347, 136.814, 136.433 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:51:39Z\",\n \"avg_ns\": 21121412060,\n \"stddev_ns\": 129317472,\n \"avg_ts\": 24.241407,\n \"stddev_ts\": 0.147959,\n \"samples_ns\": [ 21026688986, 21268741908, 21068805286 ],\n \"samples_ts\": [ 24.35, 24.0729, 24.3013 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:51:35Z", - "avg_ns": 937518237, - "stddev_ns": 1706502, - "avg_ts": 136.530979, - "stddev_ts": 0.248665, - "samples_ns": [ - 938784147, - 935578233, - 938192333 - ], - "samples_ts": [ - 136.347, - 136.814, - 136.433 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T20:51:39Z", - "avg_ns": 21121412060, - "stddev_ns": 129317472, - "avg_ts": 24.241407, - "stddev_ts": 0.147959, - "samples_ns": [ - 21026688986, - 21268741908, - 21068805286 - ], - "samples_ts": [ - 24.35, - 24.0729, - 24.3013 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 73 - }, - { - "timestamp_utc": "2025-12-08T20:53:16.578670+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:52:43Z\",\n \"avg_ns\": 4036786721,\n \"stddev_ns\": 335581168,\n \"avg_ts\": 127.393847,\n \"stddev_ts\": 10.110099,\n \"samples_ns\": [ 3858162475, 4423898701, 3828298987 ],\n \"samples_ts\": [ 132.706, 115.735, 133.741 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:52:59Z\",\n \"avg_ns\": 5741269968,\n \"stddev_ns\": 367475333,\n \"avg_ts\": 22.353722,\n \"stddev_ts\": 1.382865,\n \"samples_ns\": [ 5571589069, 6162925523, 5489295312 ],\n \"samples_ts\": [ 22.9737, 20.7694, 23.3181 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:52:43Z", - "avg_ns": 4036786721, - "stddev_ns": 335581168, - "avg_ts": 127.393847, - "stddev_ts": 10.110099, - "samples_ns": [ - 3858162475, - 4423898701, - 3828298987 - ], - "samples_ts": [ - 132.706, - 115.735, - 133.741 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T20:52:59Z", - "avg_ns": 5741269968, - "stddev_ns": 367475333, - "avg_ts": 22.353722, - "stddev_ts": 1.382865, - "samples_ns": [ - 5571589069, - 6162925523, - 5489295312 - ], - "samples_ts": [ - 22.9737, - 20.7694, - 23.3181 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 74 - }, - { - "timestamp_utc": "2025-12-08T20:54:35.512318+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:53:17Z\",\n \"avg_ns\": 4049112748,\n \"stddev_ns\": 311689692,\n \"avg_ts\": 126.928165,\n \"stddev_ts\": 9.367831,\n \"samples_ns\": [ 3844862506, 4407873493, 3894602245 ],\n \"samples_ts\": [ 133.165, 116.156, 131.464 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:53:33Z\",\n \"avg_ns\": 20754675990,\n \"stddev_ns\": 361156303,\n \"avg_ts\": 24.674071,\n \"stddev_ts\": 0.425117,\n \"samples_ns\": [ 20532475533, 20560155455, 21171396984 ],\n \"samples_ts\": [ 24.9361, 24.9025, 24.1836 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:53:17Z", - "avg_ns": 4049112748, - "stddev_ns": 311689692, - "avg_ts": 126.928165, - "stddev_ts": 9.367831, - "samples_ns": [ - 3844862506, - 4407873493, - 3894602245 - ], - "samples_ts": [ - 133.165, - 116.156, - 131.464 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T20:53:33Z", - "avg_ns": 20754675990, - "stddev_ns": 361156303, - "avg_ts": 24.674071, - "stddev_ts": 0.425117, - "samples_ns": [ - 20532475533, - 20560155455, - 21171396984 - ], - "samples_ts": [ - 24.9361, - 24.9025, - 24.1836 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 75 - }, - { - "timestamp_utc": "2025-12-08T20:54:55.645747+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:54:36Z\",\n \"avg_ns\": 939074584,\n \"stddev_ns\": 3509001,\n \"avg_ts\": 136.305669,\n \"stddev_ts\": 0.508604,\n \"samples_ns\": [ 942956682, 938138094, 936128977 ],\n \"samples_ts\": [ 135.743, 136.44, 136.733 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:54:39Z\",\n \"avg_ns\": 5233488826,\n \"stddev_ns\": 341664885,\n \"avg_ts\": 24.525074,\n \"stddev_ts\": 1.544101,\n \"samples_ns\": [ 5058492235, 5627201162, 5014773081 ],\n \"samples_ts\": [ 25.304, 22.7467, 25.5246 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:54:36Z", - "avg_ns": 939074584, - "stddev_ns": 3509001, - "avg_ts": 136.305669, - "stddev_ts": 0.508604, - "samples_ns": [ - 942956682, - 938138094, - 936128977 - ], - "samples_ts": [ - 135.743, - 136.44, - 136.733 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T20:54:39Z", - "avg_ns": 5233488826, - "stddev_ns": 341664885, - "avg_ts": 24.525074, - "stddev_ts": 1.544101, - "samples_ns": [ - 5058492235, - 5627201162, - 5014773081 - ], - "samples_ts": [ - 25.304, - 22.7467, - 25.5246 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 76 - }, - { - "timestamp_utc": "2025-12-08T20:56:04.393979+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:54:56Z\",\n \"avg_ns\": 936073843,\n \"stddev_ns\": 5708455,\n \"avg_ts\": 136.744739,\n \"stddev_ts\": 0.833977,\n \"samples_ns\": [ 941763028, 936112190, 930346311 ],\n \"samples_ts\": [ 135.915, 136.736, 137.583 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:54:59Z\",\n \"avg_ns\": 21439203263,\n \"stddev_ns\": 494033660,\n \"avg_ts\": 23.889841,\n \"stddev_ts\": 0.543987,\n \"samples_ns\": [ 22003508402, 21084669626, 21229431762 ],\n \"samples_ts\": [ 23.269, 24.283, 24.1175 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:54:56Z", - "avg_ns": 936073843, - "stddev_ns": 5708455, - "avg_ts": 136.744739, - "stddev_ts": 0.833977, - "samples_ns": [ - 941763028, - 936112190, - 930346311 - ], - "samples_ts": [ - 135.915, - 136.736, - 137.583 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T20:54:59Z", - "avg_ns": 21439203263, - "stddev_ns": 494033660, - "avg_ts": 23.889841, - "stddev_ts": 0.543987, - "samples_ns": [ - 22003508402, - 21084669626, - 21229431762 - ], - "samples_ts": [ - 23.269, - 24.283, - 24.1175 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 77 - }, - { - "timestamp_utc": "2025-12-08T20:56:37.828201+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:56:04Z\",\n \"avg_ns\": 4074758095,\n \"stddev_ns\": 339892967,\n \"avg_ts\": 126.210186,\n \"stddev_ts\": 10.044241,\n \"samples_ns\": [ 3881809517, 3875250358, 4467214411 ],\n \"samples_ts\": [ 131.897, 132.12, 114.613 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:56:21Z\",\n \"avg_ns\": 5541686228,\n \"stddev_ns\": 42997197,\n \"avg_ts\": 23.098586,\n \"stddev_ts\": 0.178420,\n \"samples_ns\": [ 5591333682, 5516544355, 5517180648 ],\n \"samples_ts\": [ 22.8926, 23.2029, 23.2003 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:56:04Z", - "avg_ns": 4074758095, - "stddev_ns": 339892967, - "avg_ts": 126.210186, - "stddev_ts": 10.044241, - "samples_ns": [ - 3881809517, - 3875250358, - 4467214411 - ], - "samples_ts": [ - 131.897, - 132.12, - 114.613 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T20:56:21Z", - "avg_ns": 5541686228, - "stddev_ns": 42997197, - "avg_ts": 23.098586, - "stddev_ts": 0.17842, - "samples_ns": [ - 5591333682, - 5516544355, - 5517180648 - ], - "samples_ts": [ - 22.8926, - 23.2029, - 23.2003 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 78 - }, - { - "timestamp_utc": "2025-12-08T20:57:58.055698+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:56:38Z\",\n \"avg_ns\": 3871608686,\n \"stddev_ns\": 5961714,\n \"avg_ts\": 132.244979,\n \"stddev_ts\": 0.203734,\n \"samples_ns\": [ 3865007280, 3876597694, 3873221086 ],\n \"samples_ts\": [ 132.471, 132.075, 132.19 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:56:53Z\",\n \"avg_ns\": 21349013157,\n \"stddev_ns\": 229589628,\n \"avg_ts\": 23.984216,\n \"stddev_ts\": 0.257024,\n \"samples_ns\": [ 21600644895, 21295463855, 21150930723 ],\n \"samples_ts\": [ 23.703, 24.0427, 24.207 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:56:38Z", - "avg_ns": 3871608686, - "stddev_ns": 5961714, - "avg_ts": 132.244979, - "stddev_ts": 0.203734, - "samples_ns": [ - 3865007280, - 3876597694, - 3873221086 - ], - "samples_ts": [ - 132.471, - 132.075, - 132.19 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T20:56:53Z", - "avg_ns": 21349013157, - "stddev_ns": 229589628, - "avg_ts": 23.984216, - "stddev_ts": 0.257024, - "samples_ns": [ - 21600644895, - 21295463855, - 21150930723 - ], - "samples_ts": [ - 23.703, - 24.0427, - 24.207 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 79 - }, - { - "timestamp_utc": "2025-12-08T20:58:17.709744+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:57:58Z\",\n \"avg_ns\": 940164746,\n \"stddev_ns\": 3938494,\n \"avg_ts\": 136.147941,\n \"stddev_ts\": 0.569051,\n \"samples_ns\": [ 938408358, 937410156, 944675725 ],\n \"samples_ts\": [ 136.401, 136.546, 135.496 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:58:02Z\",\n \"avg_ns\": 5074993825,\n \"stddev_ns\": 24614643,\n \"avg_ts\": 25.222102,\n \"stddev_ts\": 0.122511,\n \"samples_ns\": [ 5096616138, 5080158502, 5048206837 ],\n \"samples_ts\": [ 25.1147, 25.1961, 25.3555 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:57:58Z", - "avg_ns": 940164746, - "stddev_ns": 3938494, - "avg_ts": 136.147941, - "stddev_ts": 0.569051, - "samples_ns": [ - 938408358, - 937410156, - 944675725 - ], - "samples_ts": [ - 136.401, - 136.546, - 135.496 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T20:58:02Z", - "avg_ns": 5074993825, - "stddev_ns": 24614643, - "avg_ts": 25.222102, - "stddev_ts": 0.122511, - "samples_ns": [ - 5096616138, - 5080158502, - 5048206837 - ], - "samples_ts": [ - 25.1147, - 25.1961, - 25.3555 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 80 - }, - { - "timestamp_utc": "2025-12-08T20:59:26.060461+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:58:18Z\",\n \"avg_ns\": 944505276,\n \"stddev_ns\": 9249198,\n \"avg_ts\": 135.529301,\n \"stddev_ts\": 1.320020,\n \"samples_ns\": [ 940046922, 955139213, 938329693 ],\n \"samples_ts\": [ 136.163, 134.012, 136.413 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:58:22Z\",\n \"avg_ns\": 21302864218,\n \"stddev_ns\": 705189542,\n \"avg_ts\": 24.051845,\n \"stddev_ts\": 0.793965,\n \"samples_ns\": [ 22030644393, 20622676323, 21255271938 ],\n \"samples_ts\": [ 23.2404, 24.827, 24.0881 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:58:18Z", - "avg_ns": 944505276, - "stddev_ns": 9249198, - "avg_ts": 135.529301, - "stddev_ts": 1.32002, - "samples_ns": [ - 940046922, - 955139213, - 938329693 - ], - "samples_ts": [ - 136.163, - 134.012, - 136.413 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T20:58:22Z", - "avg_ns": 21302864218, - "stddev_ns": 705189542, - "avg_ts": 24.051845, - "stddev_ts": 0.793965, - "samples_ns": [ - 22030644393, - 20622676323, - 21255271938 - ], - "samples_ts": [ - 23.2404, - 24.827, - 24.0881 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 81 - }, - { - "timestamp_utc": "2025-12-08T20:59:58.618944+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:59:26Z\",\n \"avg_ns\": 4064517968,\n \"stddev_ns\": 2450842,\n \"avg_ts\": 125.968228,\n \"stddev_ts\": 0.075980,\n \"samples_ns\": [ 4066302184, 4065528243, 4061723477 ],\n \"samples_ts\": [ 125.913, 125.937, 126.055 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:59:43Z\",\n \"avg_ns\": 5002784315,\n \"stddev_ns\": 11921176,\n \"avg_ts\": 25.585849,\n \"stddev_ts\": 0.061016,\n \"samples_ns\": [ 4989745428, 5005482107, 5013125410 ],\n \"samples_ts\": [ 25.6526, 25.572, 25.533 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:59:26Z", - "avg_ns": 4064517968, - "stddev_ns": 2450842, - "avg_ts": 125.968228, - "stddev_ts": 0.07598, - "samples_ns": [ - 4066302184, - 4065528243, - 4061723477 - ], - "samples_ts": [ - 125.913, - 125.937, - 126.055 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T20:59:43Z", - "avg_ns": 5002784315, - "stddev_ns": 11921176, - "avg_ts": 25.585849, - "stddev_ts": 0.061016, - "samples_ns": [ - 4989745428, - 5005482107, - 5013125410 - ], - "samples_ts": [ - 25.6526, - 25.572, - 25.533 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 82 - }, - { - "timestamp_utc": "2025-12-08T21:01:18.655894+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:59:59Z\",\n \"avg_ns\": 4105830083,\n \"stddev_ns\": 10545991,\n \"avg_ts\": 124.701276,\n \"stddev_ts\": 0.320294,\n \"samples_ns\": [ 4116355510, 4095264421, 4105870320 ],\n \"samples_ts\": [ 124.382, 125.022, 124.7 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:00:16Z\",\n \"avg_ns\": 20796082220,\n \"stddev_ns\": 346257369,\n \"avg_ts\": 24.624546,\n \"stddev_ts\": 0.407613,\n \"samples_ns\": [ 21177904428, 20502443237, 20707898995 ],\n \"samples_ts\": [ 24.1761, 24.9726, 24.7249 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T20:59:59Z", - "avg_ns": 4105830083, - "stddev_ns": 10545991, - "avg_ts": 124.701276, - "stddev_ts": 0.320294, - "samples_ns": [ - 4116355510, - 4095264421, - 4105870320 - ], - "samples_ts": [ - 124.382, - 125.022, - 124.7 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T21:00:16Z", - "avg_ns": 20796082220, - "stddev_ns": 346257369, - "avg_ts": 24.624546, - "stddev_ts": 0.407613, - "samples_ns": [ - 21177904428, - 20502443237, - 20707898995 - ], - "samples_ts": [ - 24.1761, - 24.9726, - 24.7249 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 83 - }, - { - "timestamp_utc": "2025-12-08T21:01:38.242093+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:01:19Z\",\n \"avg_ns\": 941368996,\n \"stddev_ns\": 3741737,\n \"avg_ts\": 135.973621,\n \"stddev_ts\": 0.541690,\n \"samples_ns\": [ 943552561, 943505788, 937048640 ],\n \"samples_ts\": [ 135.658, 135.664, 136.599 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:01:23Z\",\n \"avg_ns\": 5048955937,\n \"stddev_ns\": 63278551,\n \"avg_ts\": 25.354435,\n \"stddev_ts\": 0.318203,\n \"samples_ns\": [ 5109933112, 5053331560, 4983603139 ],\n \"samples_ts\": [ 25.0493, 25.3298, 25.6842 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:01:19Z", - "avg_ns": 941368996, - "stddev_ns": 3741737, - "avg_ts": 135.973621, - "stddev_ts": 0.54169, - "samples_ns": [ - 943552561, - 943505788, - 937048640 - ], - "samples_ts": [ - 135.658, - 135.664, - 136.599 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T21:01:23Z", - "avg_ns": 5048955937, - "stddev_ns": 63278551, - "avg_ts": 25.354435, - "stddev_ts": 0.318203, - "samples_ns": [ - 5109933112, - 5053331560, - 4983603139 - ], - "samples_ts": [ - 25.0493, - 25.3298, - 25.6842 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 84 - }, - { - "timestamp_utc": "2025-12-08T21:02:45.516903+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:01:38Z\",\n \"avg_ns\": 941688542,\n \"stddev_ns\": 4311774,\n \"avg_ts\": 135.927947,\n \"stddev_ts\": 0.622828,\n \"samples_ns\": [ 942152454, 945749493, 937163680 ],\n \"samples_ts\": [ 135.859, 135.342, 136.582 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:01:42Z\",\n \"avg_ns\": 20943797023,\n \"stddev_ns\": 383286896,\n \"avg_ts\": 24.451896,\n \"stddev_ts\": 0.452157,\n \"samples_ns\": [ 20502372439, 21192205836, 21136812794 ],\n \"samples_ts\": [ 24.9727, 24.1598, 24.2231 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:01:38Z", - "avg_ns": 941688542, - "stddev_ns": 4311774, - "avg_ts": 135.927947, - "stddev_ts": 0.622828, - "samples_ns": [ - 942152454, - 945749493, - 937163680 - ], - "samples_ts": [ - 135.859, - 135.342, - 136.582 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T21:01:42Z", - "avg_ns": 20943797023, - "stddev_ns": 383286896, - "avg_ts": 24.451896, - "stddev_ts": 0.452157, - "samples_ns": [ - 20502372439, - 21192205836, - 21136812794 - ], - "samples_ts": [ - 24.9727, - 24.1598, - 24.2231 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 85 - }, - { - "timestamp_utc": "2025-12-08T21:03:17.946959+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:02:46Z\",\n \"avg_ns\": 4043552787,\n \"stddev_ns\": 319627608,\n \"avg_ts\": 127.128031,\n \"stddev_ts\": 9.615667,\n \"samples_ns\": [ 3875010560, 3843470342, 4412177459 ],\n \"samples_ts\": [ 132.129, 133.213, 116.042 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:03:02Z\",\n \"avg_ns\": 5070984869,\n \"stddev_ns\": 32983189,\n \"avg_ts\": 25.242355,\n \"stddev_ts\": 0.163792,\n \"samples_ns\": [ 5062250023, 5107456148, 5043248438 ],\n \"samples_ts\": [ 25.2852, 25.0614, 25.3805 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:02:46Z", - "avg_ns": 4043552787, - "stddev_ns": 319627608, - "avg_ts": 127.128031, - "stddev_ts": 9.615667, - "samples_ns": [ - 3875010560, - 3843470342, - 4412177459 - ], - "samples_ts": [ - 132.129, - 133.213, - 116.042 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T21:03:02Z", - "avg_ns": 5070984869, - "stddev_ns": 32983189, - "avg_ts": 25.242355, - "stddev_ts": 0.163792, - "samples_ns": [ - 5062250023, - 5107456148, - 5043248438 - ], - "samples_ts": [ - 25.2852, - 25.0614, - 25.3805 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 86 - }, - { - "timestamp_utc": "2025-12-08T21:04:37.197747+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:03:18Z\",\n \"avg_ns\": 3865011001,\n \"stddev_ns\": 9189626,\n \"avg_ts\": 132.471015,\n \"stddev_ts\": 0.315365,\n \"samples_ns\": [ 3869129173, 3871421028, 3854482803 ],\n \"samples_ts\": [ 132.33, 132.251, 132.832 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:03:34Z\",\n \"avg_ns\": 21027781313,\n \"stddev_ns\": 336272958,\n \"avg_ts\": 24.352931,\n \"stddev_ts\": 0.393038,\n \"samples_ns\": [ 21204950147, 20639968115, 21238425679 ],\n \"samples_ts\": [ 24.1453, 24.8062, 24.1072 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:03:18Z", - "avg_ns": 3865011001, - "stddev_ns": 9189626, - "avg_ts": 132.471015, - "stddev_ts": 0.315365, - "samples_ns": [ - 3869129173, - 3871421028, - 3854482803 - ], - "samples_ts": [ - 132.33, - 132.251, - 132.832 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T21:03:34Z", - "avg_ns": 21027781313, - "stddev_ns": 336272958, - "avg_ts": 24.352931, - "stddev_ts": 0.393038, - "samples_ns": [ - 21204950147, - 20639968115, - 21238425679 - ], - "samples_ts": [ - 24.1453, - 24.8062, - 24.1072 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 87 - }, - { - "timestamp_utc": "2025-12-08T21:04:57.375930+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:04:37Z\",\n \"avg_ns\": 936090748,\n \"stddev_ns\": 4501283,\n \"avg_ts\": 136.740982,\n \"stddev_ts\": 0.655789,\n \"samples_ns\": [ 941264032, 933939308, 933068904 ],\n \"samples_ts\": [ 135.987, 137.054, 137.182 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:04:41Z\",\n \"avg_ns\": 5246605566,\n \"stddev_ns\": 349709578,\n \"avg_ts\": 24.466518,\n \"stddev_ts\": 1.570614,\n \"samples_ns\": [ 5650247711, 5054861368, 5034707621 ],\n \"samples_ts\": [ 22.6539, 25.3222, 25.4235 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:04:37Z", - "avg_ns": 936090748, - "stddev_ns": 4501283, - "avg_ts": 136.740982, - "stddev_ts": 0.655789, - "samples_ns": [ - 941264032, - 933939308, - 933068904 - ], - "samples_ts": [ - 135.987, - 137.054, - 137.182 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T21:04:41Z", - "avg_ns": 5246605566, - "stddev_ns": 349709578, - "avg_ts": 24.466518, - "stddev_ts": 1.570614, - "samples_ns": [ - 5650247711, - 5054861368, - 5034707621 - ], - "samples_ts": [ - 22.6539, - 25.3222, - 25.4235 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 88 - }, - { - "timestamp_utc": "2025-12-08T21:06:04.462174+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:04:57Z\",\n \"avg_ns\": 946384167,\n \"stddev_ns\": 2137943,\n \"avg_ts\": 135.252089,\n \"stddev_ts\": 0.305471,\n \"samples_ns\": [ 946232973, 944325834, 948593694 ],\n \"samples_ts\": [ 135.273, 135.546, 134.937 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:05:01Z\",\n \"avg_ns\": 20870020017,\n \"stddev_ns\": 715888907,\n \"avg_ts\": 24.551686,\n \"stddev_ts\": 0.825912,\n \"samples_ns\": [ 20481345472, 21696176853, 20432537728 ],\n \"samples_ts\": [ 24.9984, 23.5986, 25.0581 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:04:57Z", - "avg_ns": 946384167, - "stddev_ns": 2137943, - "avg_ts": 135.252089, - "stddev_ts": 0.305471, - "samples_ns": [ - 946232973, - 944325834, - 948593694 - ], - "samples_ts": [ - 135.273, - 135.546, - 134.937 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T21:05:01Z", - "avg_ns": 20870020017, - "stddev_ns": 715888907, - "avg_ts": 24.551686, - "stddev_ts": 0.825912, - "samples_ns": [ - 20481345472, - 21696176853, - 20432537728 - ], - "samples_ts": [ - 24.9984, - 23.5986, - 25.0581 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 89 - }, - { - "timestamp_utc": "2025-12-08T21:06:37.896953+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:06:05Z\",\n \"avg_ns\": 3871085409,\n \"stddev_ns\": 11083354,\n \"avg_ts\": 132.263368,\n \"stddev_ts\": 0.378409,\n \"samples_ns\": [ 3869181966, 3861077224, 3882997038 ],\n \"samples_ts\": [ 132.328, 132.605, 131.857 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:06:20Z\",\n \"avg_ns\": 5748698143,\n \"stddev_ns\": 367049222,\n \"avg_ts\": 22.324430,\n \"stddev_ts\": 1.374777,\n \"samples_ns\": [ 6172479594, 5542472015, 5531142821 ],\n \"samples_ts\": [ 20.7372, 23.0944, 23.1417 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:06:05Z", - "avg_ns": 3871085409, - "stddev_ns": 11083354, - "avg_ts": 132.263368, - "stddev_ts": 0.378409, - "samples_ns": [ - 3869181966, - 3861077224, - 3882997038 - ], - "samples_ts": [ - 132.328, - 132.605, - 131.857 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T21:06:20Z", - "avg_ns": 5748698143, - "stddev_ns": 367049222, - "avg_ts": 22.32443, - "stddev_ts": 1.374777, - "samples_ns": [ - 6172479594, - 5542472015, - 5531142821 - ], - "samples_ts": [ - 20.7372, - 23.0944, - 23.1417 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 90 - }, - { - "timestamp_utc": "2025-12-08T21:07:57.875608+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:06:38Z\",\n \"avg_ns\": 3868803540,\n \"stddev_ns\": 11260841,\n \"avg_ts\": 132.341405,\n \"stddev_ts\": 0.385722,\n \"samples_ns\": [ 3877557749, 3872752865, 3856100006 ],\n \"samples_ts\": [ 132.042, 132.206, 132.777 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:06:53Z\",\n \"avg_ns\": 21275817771,\n \"stddev_ns\": 278419372,\n \"avg_ts\": 24.067613,\n \"stddev_ts\": 0.313295,\n \"samples_ns\": [ 21586322641, 21048403226, 21192727447 ],\n \"samples_ts\": [ 23.7187, 24.3249, 24.1592 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:06:38Z", - "avg_ns": 3868803540, - "stddev_ns": 11260841, - "avg_ts": 132.341405, - "stddev_ts": 0.385722, - "samples_ns": [ - 3877557749, - 3872752865, - 3856100006 - ], - "samples_ts": [ - 132.042, - 132.206, - 132.777 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T21:06:53Z", - "avg_ns": 21275817771, - "stddev_ns": 278419372, - "avg_ts": 24.067613, - "stddev_ts": 0.313295, - "samples_ns": [ - 21586322641, - 21048403226, - 21192727447 - ], - "samples_ts": [ - 23.7187, - 24.3249, - 24.1592 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 91 - }, - { - "timestamp_utc": "2025-12-08T21:08:18.125189+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:07:58Z\",\n \"avg_ns\": 942008068,\n \"stddev_ns\": 12656871,\n \"avg_ts\": 135.896196,\n \"stddev_ts\": 1.814782,\n \"samples_ns\": [ 937608451, 932138203, 956277552 ],\n \"samples_ts\": [ 136.518, 137.319, 133.852 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:08:02Z\",\n \"avg_ns\": 5275862265,\n \"stddev_ns\": 313203311,\n \"avg_ts\": 24.316705,\n \"stddev_ts\": 1.396502,\n \"samples_ns\": [ 5113453003, 5077220998, 5636912796 ],\n \"samples_ts\": [ 25.032, 25.2106, 22.7075 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:07:58Z", - "avg_ns": 942008068, - "stddev_ns": 12656871, - "avg_ts": 135.896196, - "stddev_ts": 1.814782, - "samples_ns": [ - 937608451, - 932138203, - 956277552 - ], - "samples_ts": [ - 136.518, - 137.319, - 133.852 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T21:08:02Z", - "avg_ns": 5275862265, - "stddev_ns": 313203311, - "avg_ts": 24.316705, - "stddev_ts": 1.396502, - "samples_ns": [ - 5113453003, - 5077220998, - 5636912796 - ], - "samples_ts": [ - 25.032, - 25.2106, - 22.7075 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 92 - }, - { - "timestamp_utc": "2025-12-08T21:09:24.647713+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:08:18Z\",\n \"avg_ns\": 944172247,\n \"stddev_ns\": 5933926,\n \"avg_ts\": 135.572059,\n \"stddev_ts\": 0.853413,\n \"samples_ns\": [ 945204680, 937789852, 949522209 ],\n \"samples_ts\": [ 135.42, 136.491, 134.805 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:08:22Z\",\n \"avg_ns\": 20690106190,\n \"stddev_ns\": 66610883,\n \"avg_ts\": 24.746298,\n \"stddev_ts\": 0.079532,\n \"samples_ns\": [ 20660197305, 20643692382, 20766428885 ],\n \"samples_ts\": [ 24.782, 24.8018, 24.6552 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:08:18Z", - "avg_ns": 944172247, - "stddev_ns": 5933926, - "avg_ts": 135.572059, - "stddev_ts": 0.853413, - "samples_ns": [ - 945204680, - 937789852, - 949522209 - ], - "samples_ts": [ - 135.42, - 136.491, - 134.805 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T21:08:22Z", - "avg_ns": 20690106190, - "stddev_ns": 66610883, - "avg_ts": 24.746298, - "stddev_ts": 0.079532, - "samples_ns": [ - 20660197305, - 20643692382, - 20766428885 - ], - "samples_ts": [ - 24.782, - 24.8018, - 24.6552 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 93 - }, - { - "timestamp_utc": "2025-12-08T21:09:57.632271+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:09:25Z\",\n \"avg_ns\": 4095923212,\n \"stddev_ns\": 11655644,\n \"avg_ts\": 125.003017,\n \"stddev_ts\": 0.355290,\n \"samples_ns\": [ 4086441099, 4108935484, 4092393055 ],\n \"samples_ts\": [ 125.292, 124.606, 125.11 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:09:41Z\",\n \"avg_ns\": 5290620650,\n \"stddev_ns\": 316509173,\n \"avg_ts\": 24.249703,\n \"stddev_ts\": 1.402729,\n \"samples_ns\": [ 5094394722, 5121713773, 5655753455 ],\n \"samples_ts\": [ 25.1257, 24.9916, 22.6318 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:09:25Z", - "avg_ns": 4095923212, - "stddev_ns": 11655644, - "avg_ts": 125.003017, - "stddev_ts": 0.35529, - "samples_ns": [ - 4086441099, - 4108935484, - 4092393055 - ], - "samples_ts": [ - 125.292, - 124.606, - 125.11 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T21:09:41Z", - "avg_ns": 5290620650, - "stddev_ns": 316509173, - "avg_ts": 24.249703, - "stddev_ts": 1.402729, - "samples_ns": [ - 5094394722, - 5121713773, - 5655753455 - ], - "samples_ts": [ - 25.1257, - 24.9916, - 22.6318 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 94 - }, - { - "timestamp_utc": "2025-12-08T21:11:21.162684+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:09:58Z\",\n \"avg_ns\": 4089939466,\n \"stddev_ns\": 22315123,\n \"avg_ts\": 125.187708,\n \"stddev_ts\": 0.682186,\n \"samples_ns\": [ 4113801043, 4086430644, 4069586712 ],\n \"samples_ts\": [ 124.459, 125.293, 125.811 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:10:14Z\",\n \"avg_ns\": 22149645947,\n \"stddev_ns\": 737208094,\n \"avg_ts\": 23.132650,\n \"stddev_ts\": 0.773357,\n \"samples_ns\": [ 21385110462, 22207738011, 22856089369 ],\n \"samples_ts\": [ 23.9419, 23.055, 22.401 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:09:58Z", - "avg_ns": 4089939466, - "stddev_ns": 22315123, - "avg_ts": 125.187708, - "stddev_ts": 0.682186, - "samples_ns": [ - 4113801043, - 4086430644, - 4069586712 - ], - "samples_ts": [ - 124.459, - 125.293, - 125.811 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T21:10:14Z", - "avg_ns": 22149645947, - "stddev_ns": 737208094, - "avg_ts": 23.13265, - "stddev_ts": 0.773357, - "samples_ns": [ - 21385110462, - 22207738011, - 22856089369 - ], - "samples_ts": [ - 23.9419, - 23.055, - 22.401 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 95 - }, - { - "timestamp_utc": "2025-12-08T21:11:41.196300+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:11:21Z\",\n \"avg_ns\": 1127860336,\n \"stddev_ns\": 333812771,\n \"avg_ts\": 119.446765,\n \"stddev_ts\": 30.193282,\n \"samples_ns\": [ 1513314088, 935272071, 934994850 ],\n \"samples_ts\": [ 84.5826, 136.859, 136.899 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:11:26Z\",\n \"avg_ns\": 5011635034,\n \"stddev_ns\": 34538143,\n \"avg_ts\": 25.541373,\n \"stddev_ts\": 0.175351,\n \"samples_ns\": [ 4988540942, 5051340214, 4995023946 ],\n \"samples_ts\": [ 25.6588, 25.3398, 25.6255 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:11:21Z", - "avg_ns": 1127860336, - "stddev_ns": 333812771, - "avg_ts": 119.446765, - "stddev_ts": 30.193282, - "samples_ns": [ - 1513314088, - 935272071, - 934994850 - ], - "samples_ts": [ - 84.5826, - 136.859, - 136.899 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T21:11:26Z", - "avg_ns": 5011635034, - "stddev_ns": 34538143, - "avg_ts": 25.541373, - "stddev_ts": 0.175351, - "samples_ns": [ - 4988540942, - 5051340214, - 4995023946 - ], - "samples_ts": [ - 25.6588, - 25.3398, - 25.6255 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 96 - }, - { - "timestamp_utc": "2025-12-08T21:12:49.204924+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:11:41Z\",\n \"avg_ns\": 948124724,\n \"stddev_ns\": 10886835,\n \"avg_ts\": 135.015132,\n \"stddev_ts\": 1.541057,\n \"samples_ns\": [ 943483482, 940327765, 960562927 ],\n \"samples_ts\": [ 135.667, 136.123, 133.255 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:11:45Z\",\n \"avg_ns\": 21175747637,\n \"stddev_ns\": 514009500,\n \"avg_ts\": 24.188043,\n \"stddev_ts\": 0.583366,\n \"samples_ns\": [ 20717684515, 21077916265, 21731642132 ],\n \"samples_ts\": [ 24.7132, 24.2908, 23.5601 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:11:41Z", - "avg_ns": 948124724, - "stddev_ns": 10886835, - "avg_ts": 135.015132, - "stddev_ts": 1.541057, - "samples_ns": [ - 943483482, - 940327765, - 960562927 - ], - "samples_ts": [ - 135.667, - 136.123, - 133.255 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T21:11:45Z", - "avg_ns": 21175747637, - "stddev_ns": 514009500, - "avg_ts": 24.188043, - "stddev_ts": 0.583366, - "samples_ns": [ - 20717684515, - 21077916265, - 21731642132 - ], - "samples_ts": [ - 24.7132, - 24.2908, - 23.5601 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 97 - }, - { - "timestamp_utc": "2025-12-08T21:13:21.535865+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:12:49Z\",\n \"avg_ns\": 4047638621,\n \"stddev_ns\": 338719105,\n \"avg_ts\": 127.059758,\n \"stddev_ts\": 10.150296,\n \"samples_ns\": [ 3833472438, 3871295782, 4438147645 ],\n \"samples_ts\": [ 133.56, 132.255, 115.363 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:13:06Z\",\n \"avg_ns\": 5025835505,\n \"stddev_ns\": 20051361,\n \"avg_ts\": 25.468673,\n \"stddev_ts\": 0.101798,\n \"samples_ns\": [ 5041393520, 5032906017, 5003206980 ],\n \"samples_ts\": [ 25.3898, 25.4326, 25.5836 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:12:49Z", - "avg_ns": 4047638621, - "stddev_ns": 338719105, - "avg_ts": 127.059758, - "stddev_ts": 10.150296, - "samples_ns": [ - 3833472438, - 3871295782, - 4438147645 - ], - "samples_ts": [ - 133.56, - 132.255, - 115.363 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T21:13:06Z", - "avg_ns": 5025835505, - "stddev_ns": 20051361, - "avg_ts": 25.468673, - "stddev_ts": 0.101798, - "samples_ns": [ - 5041393520, - 5032906017, - 5003206980 - ], - "samples_ts": [ - 25.3898, - 25.4326, - 25.5836 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 98 - }, - { - "timestamp_utc": "2025-12-08T21:14:41.646190+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:13:22Z\",\n \"avg_ns\": 3851442657,\n \"stddev_ns\": 6608996,\n \"avg_ts\": 132.937460,\n \"stddev_ts\": 0.228211,\n \"samples_ns\": [ 3857285461, 3844270683, 3852771829 ],\n \"samples_ts\": [ 132.736, 133.185, 132.891 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:13:37Z\",\n \"avg_ns\": 21336159460,\n \"stddev_ns\": 618771945,\n \"avg_ts\": 24.010245,\n \"stddev_ts\": 0.694402,\n \"samples_ns\": [ 21976377850, 20741333992, 21290766538 ],\n \"samples_ts\": [ 23.2977, 24.685, 24.048 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:13:22Z", - "avg_ns": 3851442657, - "stddev_ns": 6608996, - "avg_ts": 132.93746, - "stddev_ts": 0.228211, - "samples_ns": [ - 3857285461, - 3844270683, - 3852771829 - ], - "samples_ts": [ - 132.736, - 133.185, - 132.891 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T21:13:37Z", - "avg_ns": 21336159460, - "stddev_ns": 618771945, - "avg_ts": 24.010245, - "stddev_ts": 0.694402, - "samples_ns": [ - 21976377850, - 20741333992, - 21290766538 - ], - "samples_ts": [ - 23.2977, - 24.685, - 24.048 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 99 - }, - { - "timestamp_utc": "2025-12-08T21:15:01.134167+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:14:42Z\",\n \"avg_ns\": 943684958,\n \"stddev_ns\": 7370455,\n \"avg_ts\": 135.644025,\n \"stddev_ts\": 1.063571,\n \"samples_ns\": [ 946595248, 949155875, 935303752 ],\n \"samples_ts\": [ 135.221, 134.857, 136.854 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:14:46Z\",\n \"avg_ns\": 5010757896,\n \"stddev_ns\": 28678575,\n \"avg_ts\": 25.545594,\n \"stddev_ts\": 0.145738,\n \"samples_ns\": [ 4992101166, 4996392370, 5043780153 ],\n \"samples_ts\": [ 25.6405, 25.6185, 25.3778 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:14:42Z", - "avg_ns": 943684958, - "stddev_ns": 7370455, - "avg_ts": 135.644025, - "stddev_ts": 1.063571, - "samples_ns": [ - 946595248, - 949155875, - 935303752 - ], - "samples_ts": [ - 135.221, - 134.857, - 136.854 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T21:14:46Z", - "avg_ns": 5010757896, - "stddev_ns": 28678575, - "avg_ts": 25.545594, - "stddev_ts": 0.145738, - "samples_ns": [ - 4992101166, - 4996392370, - 5043780153 - ], - "samples_ts": [ - 25.6405, - 25.6185, - 25.3778 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 100 - }, - { - "timestamp_utc": "2025-12-08T21:16:08.638051+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:15:01Z\",\n \"avg_ns\": 936345960,\n \"stddev_ns\": 3958392,\n \"avg_ts\": 136.703236,\n \"stddev_ts\": 0.577027,\n \"samples_ns\": [ 940710249, 932987944, 935339688 ],\n \"samples_ts\": [ 136.067, 137.194, 136.849 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:15:05Z\",\n \"avg_ns\": 21015437618,\n \"stddev_ns\": 369781356,\n \"avg_ts\": 24.368098,\n \"stddev_ts\": 0.431000,\n \"samples_ns\": [ 21342830399, 21089120009, 20614362448 ],\n \"samples_ts\": [ 23.9893, 24.2779, 24.8371 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:15:01Z", - "avg_ns": 936345960, - "stddev_ns": 3958392, - "avg_ts": 136.703236, - "stddev_ts": 0.577027, - "samples_ns": [ - 940710249, - 932987944, - 935339688 - ], - "samples_ts": [ - 136.067, - 137.194, - 136.849 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T21:15:05Z", - "avg_ns": 21015437618, - "stddev_ns": 369781356, - "avg_ts": 24.368098, - "stddev_ts": 0.431, - "samples_ns": [ - 21342830399, - 21089120009, - 20614362448 - ], - "samples_ts": [ - 23.9893, - 24.2779, - 24.8371 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 101 - }, - { - "timestamp_utc": "2025-12-08T21:16:42.097298+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:16:09Z\",\n \"avg_ns\": 3889577984,\n \"stddev_ns\": 17593588,\n \"avg_ts\": 131.635609,\n \"stddev_ts\": 0.594031,\n \"samples_ns\": [ 3876874648, 3882200002, 3909659302 ],\n \"samples_ts\": [ 132.065, 131.884, 130.958 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:16:24Z\",\n \"avg_ns\": 5742551792,\n \"stddev_ns\": 350472508,\n \"avg_ts\": 22.343358,\n \"stddev_ts\": 1.317771,\n \"samples_ns\": [ 6146743734, 5523059420, 5557852222 ],\n \"samples_ts\": [ 20.824, 23.1756, 23.0305 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:16:09Z", - "avg_ns": 3889577984, - "stddev_ns": 17593588, - "avg_ts": 131.635609, - "stddev_ts": 0.594031, - "samples_ns": [ - 3876874648, - 3882200002, - 3909659302 - ], - "samples_ts": [ - 132.065, - 131.884, - 130.958 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T21:16:24Z", - "avg_ns": 5742551792, - "stddev_ns": 350472508, - "avg_ts": 22.343358, - "stddev_ts": 1.317771, - "samples_ns": [ - 6146743734, - 5523059420, - 5557852222 - ], - "samples_ts": [ - 20.824, - 23.1756, - 23.0305 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 102 - }, - { - "timestamp_utc": "2025-12-08T21:18:04.953899+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:16:42Z\",\n \"avg_ns\": 3818857957,\n \"stddev_ns\": 9597907,\n \"avg_ts\": 134.072060,\n \"stddev_ts\": 0.336674,\n \"samples_ns\": [ 3829383192, 3810590247, 3816600433 ],\n \"samples_ts\": [ 133.703, 134.362, 134.151 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:16:57Z\",\n \"avg_ns\": 22293976240,\n \"stddev_ns\": 168665433,\n \"avg_ts\": 22.966725,\n \"stddev_ts\": 0.174483,\n \"samples_ns\": [ 22100198823, 22373961753, 22407768146 ],\n \"samples_ts\": [ 23.1672, 22.8837, 22.8492 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:16:42Z", - "avg_ns": 3818857957, - "stddev_ns": 9597907, - "avg_ts": 134.07206, - "stddev_ts": 0.336674, - "samples_ns": [ - 3829383192, - 3810590247, - 3816600433 - ], - "samples_ts": [ - 133.703, - 134.362, - 134.151 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T21:16:57Z", - "avg_ns": 22293976240, - "stddev_ns": 168665433, - "avg_ts": 22.966725, - "stddev_ts": 0.174483, - "samples_ns": [ - 22100198823, - 22373961753, - 22407768146 - ], - "samples_ts": [ - 23.1672, - 22.8837, - 22.8492 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 103 - }, - { - "timestamp_utc": "2025-12-08T21:18:24.400432+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:18:05Z\",\n \"avg_ns\": 938249947,\n \"stddev_ns\": 838822,\n \"avg_ts\": 136.424274,\n \"stddev_ts\": 0.121773,\n \"samples_ns\": [ 937510965, 939159972, 938078906 ],\n \"samples_ts\": [ 136.532, 136.292, 136.449 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:18:09Z\",\n \"avg_ns\": 5000774849,\n \"stddev_ns\": 19593757,\n \"avg_ts\": 25.596296,\n \"stddev_ts\": 0.100351,\n \"samples_ns\": [ 4980231064, 5019254970, 5002838514 ],\n \"samples_ts\": [ 25.7016, 25.5018, 25.5855 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:18:05Z", - "avg_ns": 938249947, - "stddev_ns": 838822, - "avg_ts": 136.424274, - "stddev_ts": 0.121773, - "samples_ns": [ - 937510965, - 939159972, - 938078906 - ], - "samples_ts": [ - 136.532, - 136.292, - 136.449 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T21:18:09Z", - "avg_ns": 5000774849, - "stddev_ns": 19593757, - "avg_ts": 25.596296, - "stddev_ts": 0.100351, - "samples_ns": [ - 4980231064, - 5019254970, - 5002838514 - ], - "samples_ts": [ - 25.7016, - 25.5018, - 25.5855 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 104 - }, - { - "timestamp_utc": "2025-12-08T21:19:32.381981+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:18:24Z\",\n \"avg_ns\": 947025103,\n \"stddev_ns\": 1605094,\n \"avg_ts\": 135.160351,\n \"stddev_ts\": 0.229219,\n \"samples_ns\": [ 948011589, 947890010, 945173712 ],\n \"samples_ts\": [ 135.019, 135.037, 135.425 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:18:28Z\",\n \"avg_ns\": 21176692457,\n \"stddev_ns\": 21323937,\n \"avg_ts\": 24.177541,\n \"stddev_ts\": 0.024350,\n \"samples_ns\": [ 21179521069, 21196460916, 21154095386 ],\n \"samples_ts\": [ 24.1743, 24.155, 24.2034 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:18:24Z", - "avg_ns": 947025103, - "stddev_ns": 1605094, - "avg_ts": 135.160351, - "stddev_ts": 0.229219, - "samples_ns": [ - 948011589, - 947890010, - 945173712 - ], - "samples_ts": [ - 135.019, - 135.037, - 135.425 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T21:18:28Z", - "avg_ns": 21176692457, - "stddev_ns": 21323937, - "avg_ts": 24.177541, - "stddev_ts": 0.02435, - "samples_ns": [ - 21179521069, - 21196460916, - 21154095386 - ], - "samples_ts": [ - 24.1743, - 24.155, - 24.2034 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 105 - }, - { - "timestamp_utc": "2025-12-08T21:20:06.009631+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:19:32Z\",\n \"avg_ns\": 4084998986,\n \"stddev_ns\": 18549709,\n \"avg_ts\": 125.338347,\n \"stddev_ts\": 0.567816,\n \"samples_ns\": [ 4077236694, 4106168792, 4071591473 ],\n \"samples_ts\": [ 125.575, 124.69, 125.749 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:19:49Z\",\n \"avg_ns\": 5522688644,\n \"stddev_ns\": 23988943,\n \"avg_ts\": 23.177408,\n \"stddev_ts\": 0.100560,\n \"samples_ns\": [ 5518272305, 5501214841, 5548578787 ],\n \"samples_ts\": [ 23.1957, 23.2676, 23.069 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:19:32Z", - "avg_ns": 4084998986, - "stddev_ns": 18549709, - "avg_ts": 125.338347, - "stddev_ts": 0.567816, - "samples_ns": [ - 4077236694, - 4106168792, - 4071591473 - ], - "samples_ts": [ - 125.575, - 124.69, - 125.749 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T21:19:49Z", - "avg_ns": 5522688644, - "stddev_ns": 23988943, - "avg_ts": 23.177408, - "stddev_ts": 0.10056, - "samples_ns": [ - 5518272305, - 5501214841, - 5548578787 - ], - "samples_ts": [ - 23.1957, - 23.2676, - 23.069 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 106 - }, - { - "timestamp_utc": "2025-12-08T21:21:25.360522+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:20:06Z\",\n \"avg_ns\": 4085637975,\n \"stddev_ns\": 18282239,\n \"avg_ts\": 125.318699,\n \"stddev_ts\": 0.560828,\n \"samples_ns\": [ 4067215350, 4103776061, 4085922516 ],\n \"samples_ts\": [ 125.885, 124.763, 125.308 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:20:23Z\",\n \"avg_ns\": 20567248973,\n \"stddev_ns\": 94968301,\n \"avg_ts\": 24.894301,\n \"stddev_ts\": 0.115252,\n \"samples_ns\": [ 20617319843, 20626704125, 20457722951 ],\n \"samples_ts\": [ 24.8335, 24.8222, 25.0272 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:20:06Z", - "avg_ns": 4085637975, - "stddev_ns": 18282239, - "avg_ts": 125.318699, - "stddev_ts": 0.560828, - "samples_ns": [ - 4067215350, - 4103776061, - 4085922516 - ], - "samples_ts": [ - 125.885, - 124.763, - 125.308 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T21:20:23Z", - "avg_ns": 20567248973, - "stddev_ns": 94968301, - "avg_ts": 24.894301, - "stddev_ts": 0.115252, - "samples_ns": [ - 20617319843, - 20626704125, - 20457722951 - ], - "samples_ts": [ - 24.8335, - 24.8222, - 25.0272 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 107 - }, - { - "timestamp_utc": "2025-12-08T21:21:37.072210+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:21:25Z\",\n \"avg_ns\": 495314874,\n \"stddev_ns\": 2683575,\n \"avg_ts\": 258.426529,\n \"stddev_ts\": 1.399705,\n \"samples_ns\": [ 492674157, 495231473, 498038994 ],\n \"samples_ts\": [ 259.807, 258.465, 257.008 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:21:28Z\",\n \"avg_ns\": 2836733521,\n \"stddev_ns\": 11784093,\n \"avg_ts\": 45.122839,\n \"stddev_ts\": 0.187208,\n \"samples_ns\": [ 2834258568, 2826383715, 2849558282 ],\n \"samples_ts\": [ 45.1617, 45.2876, 44.9192 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:21:25Z", - "avg_ns": 495314874, - "stddev_ns": 2683575, - "avg_ts": 258.426529, - "stddev_ts": 1.399705, - "samples_ns": [ - 492674157, - 495231473, - 498038994 - ], - "samples_ts": [ - 259.807, - 258.465, - 257.008 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T21:21:28Z", - "avg_ns": 2836733521, - "stddev_ns": 11784093, - "avg_ts": 45.122839, - "stddev_ts": 0.187208, - "samples_ns": [ - 2834258568, - 2826383715, - 2849558282 - ], - "samples_ts": [ - 45.1617, - 45.2876, - 44.9192 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 108 - }, - { - "timestamp_utc": "2025-12-08T21:22:15.298814+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:21:37Z\",\n \"avg_ns\": 492395988,\n \"stddev_ns\": 722187,\n \"avg_ts\": 259.953750,\n \"stddev_ts\": 0.381023,\n \"samples_ns\": [ 493206910, 492158926, 491822128 ],\n \"samples_ts\": [ 259.526, 260.079, 260.257 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:21:39Z\",\n \"avg_ns\": 11865142367,\n \"stddev_ns\": 306841704,\n \"avg_ts\": 43.170575,\n \"stddev_ts\": 1.100001,\n \"samples_ns\": [ 12219452271, 11687553161, 11688421670 ],\n \"samples_ts\": [ 41.9004, 43.8073, 43.804 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:21:37Z", - "avg_ns": 492395988, - "stddev_ns": 722187, - "avg_ts": 259.95375, - "stddev_ts": 0.381023, - "samples_ns": [ - 493206910, - 492158926, - 491822128 - ], - "samples_ts": [ - 259.526, - 260.079, - 260.257 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T21:21:39Z", - "avg_ns": 11865142367, - "stddev_ns": 306841704, - "avg_ts": 43.170575, - "stddev_ts": 1.100001, - "samples_ns": [ - 12219452271, - 11687553161, - 11688421670 - ], - "samples_ts": [ - 41.9004, - 43.8073, - 43.804 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 109 - }, - { - "timestamp_utc": "2025-12-08T21:22:32.544433+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:22:15Z\",\n \"avg_ns\": 2024431929,\n \"stddev_ns\": 5725351,\n \"avg_ts\": 252.911801,\n \"stddev_ts\": 0.714157,\n \"samples_ns\": [ 2021734931, 2031007693, 2020553163 ],\n \"samples_ts\": [ 253.248, 252.092, 253.396 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:22:23Z\",\n \"avg_ns\": 2833007730,\n \"stddev_ns\": 13046410,\n \"avg_ts\": 45.182303,\n \"stddev_ts\": 0.208560,\n \"samples_ns\": [ 2842546357, 2818140549, 2838336285 ],\n \"samples_ts\": [ 45.03, 45.42, 45.0968 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:22:15Z", - "avg_ns": 2024431929, - "stddev_ns": 5725351, - "avg_ts": 252.911801, - "stddev_ts": 0.714157, - "samples_ns": [ - 2021734931, - 2031007693, - 2020553163 - ], - "samples_ts": [ - 253.248, - 252.092, - 253.396 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T21:22:23Z", - "avg_ns": 2833007730, - "stddev_ns": 13046410, - "avg_ts": 45.182303, - "stddev_ts": 0.20856, - "samples_ns": [ - 2842546357, - 2818140549, - 2838336285 - ], - "samples_ts": [ - 45.03, - 45.42, - 45.0968 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 110 - }, - { - "timestamp_utc": "2025-12-08T21:23:17.411338+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:22:33Z\",\n \"avg_ns\": 2011743305,\n \"stddev_ns\": 4059260,\n \"avg_ts\": 254.506322,\n \"stddev_ts\": 0.513481,\n \"samples_ns\": [ 2015877010, 2011590077, 2007762828 ],\n \"samples_ts\": [ 253.984, 254.525, 255.01 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:22:41Z\",\n \"avg_ns\": 11843177768,\n \"stddev_ns\": 405065724,\n \"avg_ts\": 43.264728,\n \"stddev_ts\": 1.451129,\n \"samples_ns\": [ 11603944065, 11614723265, 12310865974 ],\n \"samples_ts\": [ 44.1229, 44.082, 41.5893 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:22:33Z", - "avg_ns": 2011743305, - "stddev_ns": 4059260, - "avg_ts": 254.506322, - "stddev_ts": 0.513481, - "samples_ns": [ - 2015877010, - 2011590077, - 2007762828 - ], - "samples_ts": [ - 253.984, - 254.525, - 255.01 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T21:22:41Z", - "avg_ns": 11843177768, - "stddev_ns": 405065724, - "avg_ts": 43.264728, - "stddev_ts": 1.451129, - "samples_ns": [ - 11603944065, - 11614723265, - 12310865974 - ], - "samples_ts": [ - 44.1229, - 44.082, - 41.5893 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 111 - }, - { - "timestamp_utc": "2025-12-08T21:23:28.574779+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:23:17Z\",\n \"avg_ns\": 495076596,\n \"stddev_ns\": 1168462,\n \"avg_ts\": 258.546811,\n \"stddev_ts\": 0.610045,\n \"samples_ns\": [ 496230470, 495104429, 493894891 ],\n \"samples_ts\": [ 257.945, 258.531, 259.164 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:23:19Z\",\n \"avg_ns\": 2841229965,\n \"stddev_ns\": 9909782,\n \"avg_ts\": 45.051277,\n \"stddev_ts\": 0.156953,\n \"samples_ns\": [ 2852074057, 2838971020, 2832644819 ],\n \"samples_ts\": [ 44.8796, 45.0868, 45.1875 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:23:17Z", - "avg_ns": 495076596, - "stddev_ns": 1168462, - "avg_ts": 258.546811, - "stddev_ts": 0.610045, - "samples_ns": [ - 496230470, - 495104429, - 493894891 - ], - "samples_ts": [ - 257.945, - 258.531, - 259.164 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T21:23:19Z", - "avg_ns": 2841229965, - "stddev_ns": 9909782, - "avg_ts": 45.051277, - "stddev_ts": 0.156953, - "samples_ns": [ - 2852074057, - 2838971020, - 2832644819 - ], - "samples_ts": [ - 44.8796, - 45.0868, - 45.1875 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 112 - }, - { - "timestamp_utc": "2025-12-08T21:24:06.865143+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:23:29Z\",\n \"avg_ns\": 499323311,\n \"stddev_ns\": 1636162,\n \"avg_ts\": 256.348771,\n \"stddev_ts\": 0.841008,\n \"samples_ns\": [ 499753613, 500701320, 497515000 ],\n \"samples_ts\": [ 256.126, 255.641, 257.279 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:23:31Z\",\n \"avg_ns\": 11874955613,\n \"stddev_ns\": 347376318,\n \"avg_ts\": 43.140156,\n \"stddev_ts\": 1.241160,\n \"samples_ns\": [ 12275768780, 11661062048, 11688036012 ],\n \"samples_ts\": [ 41.7082, 43.9068, 43.8055 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:23:29Z", - "avg_ns": 499323311, - "stddev_ns": 1636162, - "avg_ts": 256.348771, - "stddev_ts": 0.841008, - "samples_ns": [ - 499753613, - 500701320, - 497515000 - ], - "samples_ts": [ - 256.126, - 255.641, - 257.279 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T21:23:31Z", - "avg_ns": 11874955613, - "stddev_ns": 347376318, - "avg_ts": 43.140156, - "stddev_ts": 1.24116, - "samples_ns": [ - 12275768780, - 11661062048, - 11688036012 - ], - "samples_ts": [ - 41.7082, - 43.9068, - 43.8055 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 113 - }, - { - "timestamp_utc": "2025-12-08T21:24:24.192435+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:24:07Z\",\n \"avg_ns\": 2023228574,\n \"stddev_ns\": 7603549,\n \"avg_ts\": 253.063257,\n \"stddev_ts\": 0.949493,\n \"samples_ns\": [ 2031759171, 2017164483, 2020762068 ],\n \"samples_ts\": [ 251.998, 253.822, 253.37 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:24:15Z\",\n \"avg_ns\": 2856979782,\n \"stddev_ns\": 8138557,\n \"avg_ts\": 44.802799,\n \"stddev_ts\": 0.127421,\n \"samples_ns\": [ 2866349202, 2852922560, 2851667585 ],\n \"samples_ts\": [ 44.6561, 44.8663, 44.886 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:24:07Z", - "avg_ns": 2023228574, - "stddev_ns": 7603549, - "avg_ts": 253.063257, - "stddev_ts": 0.949493, - "samples_ns": [ - 2031759171, - 2017164483, - 2020762068 - ], - "samples_ts": [ - 251.998, - 253.822, - 253.37 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T21:24:15Z", - "avg_ns": 2856979782, - "stddev_ns": 8138557, - "avg_ts": 44.802799, - "stddev_ts": 0.127421, - "samples_ns": [ - 2866349202, - 2852922560, - 2851667585 - ], - "samples_ts": [ - 44.6561, - 44.8663, - 44.886 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 114 - }, - { - "timestamp_utc": "2025-12-08T21:25:10.313975+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:24:24Z\",\n \"avg_ns\": 2011233988,\n \"stddev_ns\": 9929441,\n \"avg_ts\": 254.574215,\n \"stddev_ts\": 1.255992,\n \"samples_ns\": [ 2010343773, 2001779833, 2021578360 ],\n \"samples_ts\": [ 254.683, 255.772, 253.267 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:24:33Z\",\n \"avg_ns\": 12270921518,\n \"stddev_ns\": 485567999,\n \"avg_ts\": 41.768521,\n \"stddev_ts\": 1.663129,\n \"samples_ns\": [ 12732317404, 12316106991, 11764340159 ],\n \"samples_ts\": [ 40.2126, 41.5716, 43.5214 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:24:24Z", - "avg_ns": 2011233988, - "stddev_ns": 9929441, - "avg_ts": 254.574215, - "stddev_ts": 1.255992, - "samples_ns": [ - 2010343773, - 2001779833, - 2021578360 - ], - "samples_ts": [ - 254.683, - 255.772, - 253.267 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T21:24:33Z", - "avg_ns": 12270921518, - "stddev_ns": 485567999, - "avg_ts": 41.768521, - "stddev_ts": 1.663129, - "samples_ns": [ - 12732317404, - 12316106991, - 11764340159 - ], - "samples_ts": [ - 40.2126, - 41.5716, - 43.5214 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 115 - }, - { - "timestamp_utc": "2025-12-08T21:25:22.062144+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:25:10Z\",\n \"avg_ns\": 489417064,\n \"stddev_ns\": 2282695,\n \"avg_ts\": 261.539413,\n \"stddev_ts\": 1.216758,\n \"samples_ns\": [ 487817098, 492030963, 488403132 ],\n \"samples_ts\": [ 262.393, 260.146, 262.079 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:25:13Z\",\n \"avg_ns\": 2847802791,\n \"stddev_ns\": 19740330,\n \"avg_ts\": 44.948377,\n \"stddev_ts\": 0.312514,\n \"samples_ns\": [ 2854168567, 2825665001, 2863574805 ],\n \"samples_ts\": [ 44.8467, 45.2991, 44.6994 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:25:10Z", - "avg_ns": 489417064, - "stddev_ns": 2282695, - "avg_ts": 261.539413, - "stddev_ts": 1.216758, - "samples_ns": [ - 487817098, - 492030963, - 488403132 - ], - "samples_ts": [ - 262.393, - 260.146, - 262.079 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T21:25:13Z", - "avg_ns": 2847802791, - "stddev_ns": 19740330, - "avg_ts": 44.948377, - "stddev_ts": 0.312514, - "samples_ns": [ - 2854168567, - 2825665001, - 2863574805 - ], - "samples_ts": [ - 44.8467, - 45.2991, - 44.6994 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 116 - }, - { - "timestamp_utc": "2025-12-08T21:26:00.205268+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:25:22Z\",\n \"avg_ns\": 498553847,\n \"stddev_ns\": 2147351,\n \"avg_ts\": 256.745759,\n \"stddev_ts\": 1.107750,\n \"samples_ns\": [ 499200279, 500303604, 496157659 ],\n \"samples_ts\": [ 256.41, 255.845, 257.983 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:25:24Z\",\n \"avg_ns\": 11827195619,\n \"stddev_ns\": 353470695,\n \"avg_ts\": 43.315421,\n \"stddev_ts\": 1.272930,\n \"samples_ns\": [ 11644221682, 12234644510, 11602720665 ],\n \"samples_ts\": [ 43.9703, 41.8484, 44.1276 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:25:22Z", - "avg_ns": 498553847, - "stddev_ns": 2147351, - "avg_ts": 256.745759, - "stddev_ts": 1.10775, - "samples_ns": [ - 499200279, - 500303604, - 496157659 - ], - "samples_ts": [ - 256.41, - 255.845, - 257.983 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T21:25:24Z", - "avg_ns": 11827195619, - "stddev_ns": 353470695, - "avg_ts": 43.315421, - "stddev_ts": 1.27293, - "samples_ns": [ - 11644221682, - 12234644510, - 11602720665 - ], - "samples_ts": [ - 43.9703, - 41.8484, - 44.1276 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 117 - }, - { - "timestamp_utc": "2025-12-08T21:26:18.519918+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:26:00Z\",\n \"avg_ns\": 2331374450,\n \"stddev_ns\": 227441165,\n \"avg_ts\": 220.974975,\n \"stddev_ts\": 20.966430,\n \"samples_ns\": [ 2134337510, 2279520676, 2580265166 ],\n \"samples_ts\": [ 239.887, 224.609, 198.429 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:26:09Z\",\n \"avg_ns\": 2829896080,\n \"stddev_ns\": 16287643,\n \"avg_ts\": 45.232345,\n \"stddev_ts\": 0.261125,\n \"samples_ns\": [ 2811287142, 2836840907, 2841560191 ],\n \"samples_ts\": [ 45.5307, 45.1206, 45.0457 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:26:00Z", - "avg_ns": 2331374450, - "stddev_ns": 227441165, - "avg_ts": 220.974975, - "stddev_ts": 20.96643, - "samples_ns": [ - 2134337510, - 2279520676, - 2580265166 - ], - "samples_ts": [ - 239.887, - 224.609, - 198.429 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T21:26:09Z", - "avg_ns": 2829896080, - "stddev_ns": 16287643, - "avg_ts": 45.232345, - "stddev_ts": 0.261125, - "samples_ns": [ - 2811287142, - 2836840907, - 2841560191 - ], - "samples_ts": [ - 45.5307, - 45.1206, - 45.0457 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 118 - }, - { - "timestamp_utc": "2025-12-08T21:27:02.646925+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:26:19Z\",\n \"avg_ns\": 2127285773,\n \"stddev_ns\": 5302720,\n \"avg_ts\": 240.683282,\n \"stddev_ts\": 0.600148,\n \"samples_ns\": [ 2121747000, 2132315296, 2127795024 ],\n \"samples_ts\": [ 241.311, 240.115, 240.625 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:26:27Z\",\n \"avg_ns\": 11624166837,\n \"stddev_ns\": 25664581,\n \"avg_ts\": 44.046311,\n \"stddev_ts\": 0.097197,\n \"samples_ns\": [ 11600801179, 11620064167, 11651635167 ],\n \"samples_ts\": [ 44.1349, 44.0617, 43.9423 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:26:19Z", - "avg_ns": 2127285773, - "stddev_ns": 5302720, - "avg_ts": 240.683282, - "stddev_ts": 0.600148, - "samples_ns": [ - 2121747000, - 2132315296, - 2127795024 - ], - "samples_ts": [ - 241.311, - 240.115, - 240.625 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T21:26:27Z", - "avg_ns": 11624166837, - "stddev_ns": 25664581, - "avg_ts": 44.046311, - "stddev_ts": 0.097197, - "samples_ns": [ - 11600801179, - 11620064167, - 11651635167 - ], - "samples_ts": [ - 44.1349, - 44.0617, - 43.9423 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 119 - }, - { - "timestamp_utc": "2025-12-08T21:27:14.222713+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:27:03Z\",\n \"avg_ns\": 494979822,\n \"stddev_ns\": 1592343,\n \"avg_ts\": 258.598183,\n \"stddev_ts\": 0.831568,\n \"samples_ns\": [ 496636288, 493460468, 494842710 ],\n \"samples_ts\": [ 257.734, 259.393, 258.668 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:27:05Z\",\n \"avg_ns\": 2827894679,\n \"stddev_ns\": 6249864,\n \"avg_ts\": 45.263502,\n \"stddev_ts\": 0.100076,\n \"samples_ns\": [ 2833731045, 2828652557, 2821300435 ],\n \"samples_ts\": [ 45.1701, 45.2512, 45.3691 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:27:03Z", - "avg_ns": 494979822, - "stddev_ns": 1592343, - "avg_ts": 258.598183, - "stddev_ts": 0.831568, - "samples_ns": [ - 496636288, - 493460468, - 494842710 - ], - "samples_ts": [ - 257.734, - 259.393, - 258.668 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T21:27:05Z", - "avg_ns": 2827894679, - "stddev_ns": 6249864, - "avg_ts": 45.263502, - "stddev_ts": 0.100076, - "samples_ns": [ - 2833731045, - 2828652557, - 2821300435 - ], - "samples_ts": [ - 45.1701, - 45.2512, - 45.3691 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 120 - }, - { - "timestamp_utc": "2025-12-08T21:27:52.419349+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:27:14Z\",\n \"avg_ns\": 493099945,\n \"stddev_ns\": 2380801,\n \"avg_ts\": 259.586287,\n \"stddev_ts\": 1.249874,\n \"samples_ns\": [ 491799983, 491652120, 495847732 ],\n \"samples_ts\": [ 260.268, 260.347, 258.144 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:27:16Z\",\n \"avg_ns\": 11850589317,\n \"stddev_ns\": 366217708,\n \"avg_ts\": 43.231649,\n \"stddev_ts\": 1.312823,\n \"samples_ns\": [ 12272962099, 11657188721, 11621617131 ],\n \"samples_ts\": [ 41.7177, 43.9214, 44.0558 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:27:14Z", - "avg_ns": 493099945, - "stddev_ns": 2380801, - "avg_ts": 259.586287, - "stddev_ts": 1.249874, - "samples_ns": [ - 491799983, - 491652120, - 495847732 - ], - "samples_ts": [ - 260.268, - 260.347, - 258.144 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T21:27:16Z", - "avg_ns": 11850589317, - "stddev_ns": 366217708, - "avg_ts": 43.231649, - "stddev_ts": 1.312823, - "samples_ns": [ - 12272962099, - 11657188721, - 11621617131 - ], - "samples_ts": [ - 41.7177, - 43.9214, - 44.0558 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 121 - }, - { - "timestamp_utc": "2025-12-08T21:28:09.782173+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:27:52Z\",\n \"avg_ns\": 2023645113,\n \"stddev_ns\": 8566754,\n \"avg_ts\": 253.011815,\n \"stddev_ts\": 1.072119,\n \"samples_ns\": [ 2031471565, 2014492682, 2024971092 ],\n \"samples_ts\": [ 252.034, 254.158, 252.843 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:28:01Z\",\n \"avg_ns\": 2865755346,\n \"stddev_ns\": 16441019,\n \"avg_ts\": 44.666341,\n \"stddev_ts\": 0.255888,\n \"samples_ns\": [ 2883427143, 2850912372, 2862926524 ],\n \"samples_ts\": [ 44.3916, 44.8979, 44.7095 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:27:52Z", - "avg_ns": 2023645113, - "stddev_ns": 8566754, - "avg_ts": 253.011815, - "stddev_ts": 1.072119, - "samples_ns": [ - 2031471565, - 2014492682, - 2024971092 - ], - "samples_ts": [ - 252.034, - 254.158, - 252.843 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T21:28:01Z", - "avg_ns": 2865755346, - "stddev_ns": 16441019, - "avg_ts": 44.666341, - "stddev_ts": 0.255888, - "samples_ns": [ - 2883427143, - 2850912372, - 2862926524 - ], - "samples_ts": [ - 44.3916, - 44.8979, - 44.7095 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 122 - }, - { - "timestamp_utc": "2025-12-08T21:28:54.813137+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:28:10Z\",\n \"avg_ns\": 2018620828,\n \"stddev_ns\": 5330147,\n \"avg_ts\": 253.639698,\n \"stddev_ts\": 0.668927,\n \"samples_ns\": [ 2014401317, 2016850475, 2024610693 ],\n \"samples_ts\": [ 254.17, 253.861, 252.888 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:28:18Z\",\n \"avg_ns\": 12091529207,\n \"stddev_ns\": 334689778,\n \"avg_ts\": 42.365678,\n \"stddev_ts\": 1.191399,\n \"samples_ns\": [ 12305000358, 11705795806, 12263791458 ],\n \"samples_ts\": [ 41.6091, 43.739, 41.7489 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:28:10Z", - "avg_ns": 2018620828, - "stddev_ns": 5330147, - "avg_ts": 253.639698, - "stddev_ts": 0.668927, - "samples_ns": [ - 2014401317, - 2016850475, - 2024610693 - ], - "samples_ts": [ - 254.17, - 253.861, - 252.888 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T21:28:18Z", - "avg_ns": 12091529207, - "stddev_ns": 334689778, - "avg_ts": 42.365678, - "stddev_ts": 1.191399, - "samples_ns": [ - 12305000358, - 11705795806, - 12263791458 - ], - "samples_ts": [ - 41.6091, - 43.739, - 41.7489 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 123 - }, - { - "timestamp_utc": "2025-12-08T21:29:06.663117+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:28:55Z\",\n \"avg_ns\": 496284865,\n \"stddev_ns\": 3166949,\n \"avg_ts\": 257.923379,\n \"stddev_ts\": 1.643496,\n \"samples_ns\": [ 493387073, 495802266, 499665258 ],\n \"samples_ts\": [ 259.431, 258.167, 256.172 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:28:57Z\",\n \"avg_ns\": 3060779736,\n \"stddev_ns\": 360443712,\n \"avg_ts\": 42.185085,\n \"stddev_ts\": 4.658261,\n \"samples_ns\": [ 2829999083, 2876211919, 3476128206 ],\n \"samples_ts\": [ 45.2297, 44.503, 36.8226 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:28:55Z", - "avg_ns": 496284865, - "stddev_ns": 3166949, - "avg_ts": 257.923379, - "stddev_ts": 1.643496, - "samples_ns": [ - 493387073, - 495802266, - 499665258 - ], - "samples_ts": [ - 259.431, - 258.167, - 256.172 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T21:28:57Z", - "avg_ns": 3060779736, - "stddev_ns": 360443712, - "avg_ts": 42.185085, - "stddev_ts": 4.658261, - "samples_ns": [ - 2829999083, - 2876211919, - 3476128206 - ], - "samples_ts": [ - 45.2297, - 44.503, - 36.8226 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 124 - }, - { - "timestamp_utc": "2025-12-08T21:29:44.295240+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:29:07Z\",\n \"avg_ns\": 498362988,\n \"stddev_ns\": 2208588,\n \"avg_ts\": 256.844272,\n \"stddev_ts\": 1.140306,\n \"samples_ns\": [ 500153633, 499040103, 495895229 ],\n \"samples_ts\": [ 255.921, 256.492, 258.119 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:29:09Z\",\n \"avg_ns\": 11648421219,\n \"stddev_ns\": 33200496,\n \"avg_ts\": 43.954693,\n \"stddev_ts\": 0.125442,\n \"samples_ns\": [ 11674422830, 11611023724, 11659817104 ],\n \"samples_ts\": [ 43.8566, 44.096, 43.9115 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:29:07Z", - "avg_ns": 498362988, - "stddev_ns": 2208588, - "avg_ts": 256.844272, - "stddev_ts": 1.140306, - "samples_ns": [ - 500153633, - 499040103, - 495895229 - ], - "samples_ts": [ - 255.921, - 256.492, - 258.119 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T21:29:09Z", - "avg_ns": 11648421219, - "stddev_ns": 33200496, - "avg_ts": 43.954693, - "stddev_ts": 0.125442, - "samples_ns": [ - 11674422830, - 11611023724, - 11659817104 - ], - "samples_ts": [ - 43.8566, - 44.096, - 43.9115 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 125 - }, - { - "timestamp_utc": "2025-12-08T21:30:01.521732+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:29:44Z\",\n \"avg_ns\": 2017633656,\n \"stddev_ns\": 12182399,\n \"avg_ts\": 253.768790,\n \"stddev_ts\": 1.533038,\n \"samples_ns\": [ 2018305853, 2005129075, 2029466040 ],\n \"samples_ts\": [ 253.678, 255.345, 252.283 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:29:52Z\",\n \"avg_ns\": 2832730487,\n \"stddev_ns\": 7192275,\n \"avg_ts\": 45.186279,\n \"stddev_ts\": 0.114557,\n \"samples_ns\": [ 2828456250, 2828701232, 2841033980 ],\n \"samples_ts\": [ 45.2544, 45.2504, 45.054 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:29:44Z", - "avg_ns": 2017633656, - "stddev_ns": 12182399, - "avg_ts": 253.76879, - "stddev_ts": 1.533038, - "samples_ns": [ - 2018305853, - 2005129075, - 2029466040 - ], - "samples_ts": [ - 253.678, - 255.345, - 252.283 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T21:29:52Z", - "avg_ns": 2832730487, - "stddev_ns": 7192275, - "avg_ts": 45.186279, - "stddev_ts": 0.114557, - "samples_ns": [ - 2828456250, - 2828701232, - 2841033980 - ], - "samples_ts": [ - 45.2544, - 45.2504, - 45.054 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 126 - }, - { - "timestamp_utc": "2025-12-08T21:30:46.365094+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:30:02Z\",\n \"avg_ns\": 2034022711,\n \"stddev_ns\": 2954270,\n \"avg_ts\": 251.718290,\n \"stddev_ts\": 0.365412,\n \"samples_ns\": [ 2033450642, 2037220830, 2031396662 ],\n \"samples_ts\": [ 251.789, 251.323, 252.043 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:30:10Z\",\n \"avg_ns\": 11903711805,\n \"stddev_ns\": 356940656,\n \"avg_ts\": 43.037332,\n \"stddev_ts\": 1.277652,\n \"samples_ns\": [ 12296230581, 11598579142, 11816325692 ],\n \"samples_ts\": [ 41.6388, 44.1433, 43.3299 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:30:02Z", - "avg_ns": 2034022711, - "stddev_ns": 2954270, - "avg_ts": 251.71829, - "stddev_ts": 0.365412, - "samples_ns": [ - 2033450642, - 2037220830, - 2031396662 - ], - "samples_ts": [ - 251.789, - 251.323, - 252.043 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T21:30:10Z", - "avg_ns": 11903711805, - "stddev_ns": 356940656, - "avg_ts": 43.037332, - "stddev_ts": 1.277652, - "samples_ns": [ - 12296230581, - 11598579142, - 11816325692 - ], - "samples_ts": [ - 41.6388, - 44.1433, - 43.3299 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 127 - }, - { - "timestamp_utc": "2025-12-08T21:30:57.690603+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:30:47Z\",\n \"avg_ns\": 495146577,\n \"stddev_ns\": 3576260,\n \"avg_ts\": 258.518270,\n \"stddev_ts\": 1.860833,\n \"samples_ns\": [ 492395102, 493855635, 499188995 ],\n \"samples_ts\": [ 259.954, 259.185, 256.416 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:30:49Z\",\n \"avg_ns\": 2846075809,\n \"stddev_ns\": 10977668,\n \"avg_ts\": 44.974651,\n \"stddev_ts\": 0.173091,\n \"samples_ns\": [ 2839225342, 2840264564, 2858737521 ],\n \"samples_ts\": [ 45.0827, 45.0662, 44.775 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:30:47Z", - "avg_ns": 495146577, - "stddev_ns": 3576260, - "avg_ts": 258.51827, - "stddev_ts": 1.860833, - "samples_ns": [ - 492395102, - 493855635, - 499188995 - ], - "samples_ts": [ - 259.954, - 259.185, - 256.416 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T21:30:49Z", - "avg_ns": 2846075809, - "stddev_ns": 10977668, - "avg_ts": 44.974651, - "stddev_ts": 0.173091, - "samples_ns": [ - 2839225342, - 2840264564, - 2858737521 - ], - "samples_ts": [ - 45.0827, - 45.0662, - 44.775 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 128 - }, - { - "timestamp_utc": "2025-12-08T21:31:35.396792+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:30:58Z\",\n \"avg_ns\": 497603592,\n \"stddev_ns\": 636374,\n \"avg_ts\": 257.233150,\n \"stddev_ts\": 0.329056,\n \"samples_ns\": [ 496927806, 498191401, 497691569 ],\n \"samples_ts\": [ 257.583, 256.929, 257.187 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:31:00Z\",\n \"avg_ns\": 11676651924,\n \"stddev_ns\": 58743134,\n \"avg_ts\": 43.848926,\n \"stddev_ts\": 0.220775,\n \"samples_ns\": [ 11731972008, 11682984512, 11614999254 ],\n \"samples_ts\": [ 43.6414, 43.8244, 44.0809 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:30:58Z", - "avg_ns": 497603592, - "stddev_ns": 636374, - "avg_ts": 257.23315, - "stddev_ts": 0.329056, - "samples_ns": [ - 496927806, - 498191401, - 497691569 - ], - "samples_ts": [ - 257.583, - 256.929, - 257.187 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T21:31:00Z", - "avg_ns": 11676651924, - "stddev_ns": 58743134, - "avg_ts": 43.848926, - "stddev_ts": 0.220775, - "samples_ns": [ - 11731972008, - 11682984512, - 11614999254 - ], - "samples_ts": [ - 43.6414, - 43.8244, - 44.0809 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 129 - }, - { - "timestamp_utc": "2025-12-08T21:31:53.098046+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:31:35Z\",\n \"avg_ns\": 2131013928,\n \"stddev_ns\": 678574,\n \"avg_ts\": 240.261233,\n \"stddev_ts\": 0.076315,\n \"samples_ns\": [ 2130658646, 2130588525, 2131794614 ],\n \"samples_ts\": [ 240.301, 240.309, 240.173 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:31:44Z\",\n \"avg_ns\": 2833632094,\n \"stddev_ns\": 7608090,\n \"avg_ts\": 45.171925,\n \"stddev_ts\": 0.121346,\n \"samples_ns\": [ 2834661406, 2825561937, 2840672940 ],\n \"samples_ts\": [ 45.1553, 45.3007, 45.0597 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:31:35Z", - "avg_ns": 2131013928, - "stddev_ns": 678574, - "avg_ts": 240.261233, - "stddev_ts": 0.076315, - "samples_ns": [ - 2130658646, - 2130588525, - 2131794614 - ], - "samples_ts": [ - 240.301, - 240.309, - 240.173 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T21:31:44Z", - "avg_ns": 2833632094, - "stddev_ns": 7608090, - "avg_ts": 45.171925, - "stddev_ts": 0.121346, - "samples_ns": [ - 2834661406, - 2825561937, - 2840672940 - ], - "samples_ts": [ - 45.1553, - 45.3007, - 45.0597 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 130 - }, - { - "timestamp_utc": "2025-12-08T21:32:37.867332+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:31:53Z\",\n \"avg_ns\": 2133009732,\n \"stddev_ns\": 18306409,\n \"avg_ts\": 240.048170,\n \"stddev_ts\": 2.055234,\n \"samples_ns\": [ 2116777012, 2129400198, 2152851987 ],\n \"samples_ts\": [ 241.877, 240.443, 237.824 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:32:02Z\",\n \"avg_ns\": 11853239356,\n \"stddev_ns\": 328626586,\n \"avg_ts\": 43.216760,\n \"stddev_ts\": 1.180384,\n \"samples_ns\": [ 11701479541, 12230320446, 11627918081 ],\n \"samples_ts\": [ 43.7552, 41.8632, 44.032 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:31:53Z", - "avg_ns": 2133009732, - "stddev_ns": 18306409, - "avg_ts": 240.04817, - "stddev_ts": 2.055234, - "samples_ns": [ - 2116777012, - 2129400198, - 2152851987 - ], - "samples_ts": [ - 241.877, - 240.443, - 237.824 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T21:32:02Z", - "avg_ns": 11853239356, - "stddev_ns": 328626586, - "avg_ts": 43.21676, - "stddev_ts": 1.180384, - "samples_ns": [ - 11701479541, - 12230320446, - 11627918081 - ], - "samples_ts": [ - 43.7552, - 41.8632, - 44.032 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 131 - }, - { - "timestamp_utc": "2025-12-08T21:32:48.974028+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:32:38Z\",\n \"avg_ns\": 492306084,\n \"stddev_ns\": 227309,\n \"avg_ts\": 260.000886,\n \"stddev_ts\": 0.120078,\n \"samples_ns\": [ 492408910, 492463811, 492045531 ],\n \"samples_ts\": [ 259.947, 259.918, 260.139 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:32:40Z\",\n \"avg_ns\": 2821223358,\n \"stddev_ns\": 19775784,\n \"avg_ts\": 45.371881,\n \"stddev_ts\": 0.319321,\n \"samples_ns\": [ 2798413426, 2831699930, 2833556718 ],\n \"samples_ts\": [ 45.7402, 45.2025, 45.1729 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:32:38Z", - "avg_ns": 492306084, - "stddev_ns": 227309, - "avg_ts": 260.000886, - "stddev_ts": 0.120078, - "samples_ns": [ - 492408910, - 492463811, - 492045531 - ], - "samples_ts": [ - 259.947, - 259.918, - 260.139 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T21:32:40Z", - "avg_ns": 2821223358, - "stddev_ns": 19775784, - "avg_ts": 45.371881, - "stddev_ts": 0.319321, - "samples_ns": [ - 2798413426, - 2831699930, - 2833556718 - ], - "samples_ts": [ - 45.7402, - 45.2025, - 45.1729 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 132 - }, - { - "timestamp_utc": "2025-12-08T21:33:26.742172+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:32:49Z\",\n \"avg_ns\": 492659190,\n \"stddev_ns\": 2567802,\n \"avg_ts\": 259.819216,\n \"stddev_ts\": 1.358140,\n \"samples_ns\": [ 493996268, 494282331, 489698973 ],\n \"samples_ts\": [ 259.111, 258.961, 261.385 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:32:51Z\",\n \"avg_ns\": 11708273903,\n \"stddev_ns\": 6953655,\n \"avg_ts\": 43.729770,\n \"stddev_ts\": 0.025968,\n \"samples_ns\": [ 11700935313, 11709125626, 11714760772 ],\n \"samples_ts\": [ 43.7572, 43.7266, 43.7055 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:32:49Z", - "avg_ns": 492659190, - "stddev_ns": 2567802, - "avg_ts": 259.819216, - "stddev_ts": 1.35814, - "samples_ns": [ - 493996268, - 494282331, - 489698973 - ], - "samples_ts": [ - 259.111, - 258.961, - 261.385 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T21:32:51Z", - "avg_ns": 11708273903, - "stddev_ns": 6953655, - "avg_ts": 43.72977, - "stddev_ts": 0.025968, - "samples_ns": [ - 11700935313, - 11709125626, - 11714760772 - ], - "samples_ts": [ - 43.7572, - 43.7266, - 43.7055 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 133 - }, - { - "timestamp_utc": "2025-12-08T21:33:44.627268+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:33:27Z\",\n \"avg_ns\": 2213495698,\n \"stddev_ns\": 341297486,\n \"avg_ts\": 234.724549,\n \"stddev_ts\": 33.234012,\n \"samples_ns\": [ 2014591218, 2607586235, 2018309643 ],\n \"samples_ts\": [ 254.146, 196.35, 253.678 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:33:36Z\",\n \"avg_ns\": 2847341717,\n \"stddev_ns\": 12362359,\n \"avg_ts\": 44.954776,\n \"stddev_ts\": 0.195163,\n \"samples_ns\": [ 2847179094, 2859784356, 2835061703 ],\n \"samples_ts\": [ 44.9568, 44.7586, 45.1489 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:33:27Z", - "avg_ns": 2213495698, - "stddev_ns": 341297486, - "avg_ts": 234.724549, - "stddev_ts": 33.234012, - "samples_ns": [ - 2014591218, - 2607586235, - 2018309643 - ], - "samples_ts": [ - 254.146, - 196.35, - 253.678 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T21:33:36Z", - "avg_ns": 2847341717, - "stddev_ns": 12362359, - "avg_ts": 44.954776, - "stddev_ts": 0.195163, - "samples_ns": [ - 2847179094, - 2859784356, - 2835061703 - ], - "samples_ts": [ - 44.9568, - 44.7586, - 45.1489 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 134 - }, - { - "timestamp_utc": "2025-12-08T21:34:29.084742+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:33:45Z\",\n \"avg_ns\": 2019766989,\n \"stddev_ns\": 2737778,\n \"avg_ts\": 253.494898,\n \"stddev_ts\": 0.343475,\n \"samples_ns\": [ 2017347527, 2019214595, 2022738845 ],\n \"samples_ts\": [ 253.799, 253.564, 253.122 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:33:53Z\",\n \"avg_ns\": 11899458025,\n \"stddev_ns\": 310071070,\n \"avg_ts\": 43.046367,\n \"stddev_ts\": 1.105087,\n \"samples_ns\": [ 11714541883, 11726400448, 12257431746 ],\n \"samples_ts\": [ 43.7064, 43.6622, 41.7706 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:33:45Z", - "avg_ns": 2019766989, - "stddev_ns": 2737778, - "avg_ts": 253.494898, - "stddev_ts": 0.343475, - "samples_ns": [ - 2017347527, - 2019214595, - 2022738845 - ], - "samples_ts": [ - 253.799, - 253.564, - 253.122 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T21:33:53Z", - "avg_ns": 11899458025, - "stddev_ns": 310071070, - "avg_ts": 43.046367, - "stddev_ts": 1.105087, - "samples_ns": [ - 11714541883, - 11726400448, - 12257431746 - ], - "samples_ts": [ - 43.7064, - 43.6622, - 41.7706 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 135 - }, - { - "timestamp_utc": "2025-12-08T21:34:40.277357+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:34:29Z\",\n \"avg_ns\": 497164625,\n \"stddev_ns\": 1179444,\n \"avg_ts\": 257.460958,\n \"stddev_ts\": 0.611487,\n \"samples_ns\": [ 495807783, 497743920, 497942173 ],\n \"samples_ts\": [ 258.165, 257.16, 257.058 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:34:31Z\",\n \"avg_ns\": 2845901419,\n \"stddev_ns\": 6293717,\n \"avg_ts\": 44.977109,\n \"stddev_ts\": 0.099451,\n \"samples_ns\": [ 2852344717, 2839768789, 2845590751 ],\n \"samples_ts\": [ 44.8754, 45.0741, 44.9819 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:34:29Z", - "avg_ns": 497164625, - "stddev_ns": 1179444, - "avg_ts": 257.460958, - "stddev_ts": 0.611487, - "samples_ns": [ - 495807783, - 497743920, - 497942173 - ], - "samples_ts": [ - 258.165, - 257.16, - 257.058 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T21:34:31Z", - "avg_ns": 2845901419, - "stddev_ns": 6293717, - "avg_ts": 44.977109, - "stddev_ts": 0.099451, - "samples_ns": [ - 2852344717, - 2839768789, - 2845590751 - ], - "samples_ts": [ - 44.8754, - 45.0741, - 44.9819 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 136 - }, - { - "timestamp_utc": "2025-12-08T21:35:18.475128+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:34:40Z\",\n \"avg_ns\": 494383981,\n \"stddev_ns\": 2048120,\n \"avg_ts\": 258.911033,\n \"stddev_ts\": 1.074638,\n \"samples_ns\": [ 492063104, 495152051, 495936790 ],\n \"samples_ts\": [ 260.129, 258.506, 258.097 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:34:42Z\",\n \"avg_ns\": 11851095680,\n \"stddev_ns\": 335900617,\n \"avg_ts\": 43.225535,\n \"stddev_ts\": 1.205519,\n \"samples_ns\": [ 12238779371, 11667512004, 11646995666 ],\n \"samples_ts\": [ 41.8342, 43.8825, 43.9598 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:34:40Z", - "avg_ns": 494383981, - "stddev_ns": 2048120, - "avg_ts": 258.911033, - "stddev_ts": 1.074638, - "samples_ns": [ - 492063104, - 495152051, - 495936790 - ], - "samples_ts": [ - 260.129, - 258.506, - 258.097 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T21:34:42Z", - "avg_ns": 11851095680, - "stddev_ns": 335900617, - "avg_ts": 43.225535, - "stddev_ts": 1.205519, - "samples_ns": [ - 12238779371, - 11667512004, - 11646995666 - ], - "samples_ts": [ - 41.8342, - 43.8825, - 43.9598 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 137 - }, - { - "timestamp_utc": "2025-12-08T21:35:36.388266+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:35:19Z\",\n \"avg_ns\": 2218036957,\n \"stddev_ns\": 324995986,\n \"avg_ts\": 233.922858,\n \"stddev_ts\": 31.614752,\n \"samples_ns\": [ 2040189460, 2593142620, 2020778793 ],\n \"samples_ts\": [ 250.957, 197.444, 253.368 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:35:27Z\",\n \"avg_ns\": 2850094443,\n \"stddev_ns\": 7246402,\n \"avg_ts\": 44.910986,\n \"stddev_ts\": 0.114348,\n \"samples_ns\": [ 2854398894, 2841728650, 2854155787 ],\n \"samples_ts\": [ 44.8431, 45.043, 44.8469 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:35:19Z", - "avg_ns": 2218036957, - "stddev_ns": 324995986, - "avg_ts": 233.922858, - "stddev_ts": 31.614752, - "samples_ns": [ - 2040189460, - 2593142620, - 2020778793 - ], - "samples_ts": [ - 250.957, - 197.444, - 253.368 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T21:35:27Z", - "avg_ns": 2850094443, - "stddev_ns": 7246402, - "avg_ts": 44.910986, - "stddev_ts": 0.114348, - "samples_ns": [ - 2854398894, - 2841728650, - 2854155787 - ], - "samples_ts": [ - 44.8431, - 45.043, - 44.8469 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 138 - }, - { - "timestamp_utc": "2025-12-08T21:36:20.109917+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:35:36Z\",\n \"avg_ns\": 2013341637,\n \"stddev_ns\": 10303135,\n \"avg_ts\": 254.308037,\n \"stddev_ts\": 1.304526,\n \"samples_ns\": [ 2021303957, 2001705041, 2017015913 ],\n \"samples_ts\": [ 253.302, 255.782, 253.84 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:35:45Z\",\n \"avg_ns\": 11670047527,\n \"stddev_ns\": 48594018,\n \"avg_ts\": 43.873506,\n \"stddev_ts\": 0.182257,\n \"samples_ns\": [ 11639168138, 11726060790, 11644913654 ],\n \"samples_ts\": [ 43.9894, 43.6634, 43.9677 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:35:36Z", - "avg_ns": 2013341637, - "stddev_ns": 10303135, - "avg_ts": 254.308037, - "stddev_ts": 1.304526, - "samples_ns": [ - 2021303957, - 2001705041, - 2017015913 - ], - "samples_ts": [ - 253.302, - 255.782, - 253.84 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T21:35:45Z", - "avg_ns": 11670047527, - "stddev_ns": 48594018, - "avg_ts": 43.873506, - "stddev_ts": 0.182257, - "samples_ns": [ - 11639168138, - 11726060790, - 11644913654 - ], - "samples_ts": [ - 43.9894, - 43.6634, - 43.9677 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 139 - }, - { - "timestamp_utc": "2025-12-08T21:36:31.212696+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:36:20Z\",\n \"avg_ns\": 493058298,\n \"stddev_ns\": 1957911,\n \"avg_ts\": 259.606915,\n \"stddev_ts\": 1.029410,\n \"samples_ns\": [ 491403877, 495219895, 492551122 ],\n \"samples_ts\": [ 260.478, 258.471, 259.872 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:36:22Z\",\n \"avg_ns\": 2821436253,\n \"stddev_ns\": 14894183,\n \"avg_ts\": 45.367807,\n \"stddev_ts\": 0.239134,\n \"samples_ns\": [ 2808238428, 2818485458, 2837584874 ],\n \"samples_ts\": [ 45.5802, 45.4145, 45.1088 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:36:20Z", - "avg_ns": 493058298, - "stddev_ns": 1957911, - "avg_ts": 259.606915, - "stddev_ts": 1.02941, - "samples_ns": [ - 491403877, - 495219895, - 492551122 - ], - "samples_ts": [ - 260.478, - 258.471, - 259.872 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T21:36:22Z", - "avg_ns": 2821436253, - "stddev_ns": 14894183, - "avg_ts": 45.367807, - "stddev_ts": 0.239134, - "samples_ns": [ - 2808238428, - 2818485458, - 2837584874 - ], - "samples_ts": [ - 45.5802, - 45.4145, - 45.1088 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 140 - }, - { - "timestamp_utc": "2025-12-08T21:37:09.300701+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:36:31Z\",\n \"avg_ns\": 498579047,\n \"stddev_ns\": 4805668,\n \"avg_ts\": 256.745539,\n \"stddev_ts\": 2.480379,\n \"samples_ns\": [ 493432938, 502949774, 499354431 ],\n \"samples_ts\": [ 259.407, 254.499, 256.331 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:36:33Z\",\n \"avg_ns\": 11809875581,\n \"stddev_ns\": 379385205,\n \"avg_ts\": 43.382852,\n \"stddev_ts\": 1.368343,\n \"samples_ns\": [ 11581540177, 11600268161, 12247818406 ],\n \"samples_ts\": [ 44.2083, 44.1369, 41.8034 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:36:31Z", - "avg_ns": 498579047, - "stddev_ns": 4805668, - "avg_ts": 256.745539, - "stddev_ts": 2.480379, - "samples_ns": [ - 493432938, - 502949774, - 499354431 - ], - "samples_ts": [ - 259.407, - 254.499, - 256.331 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T21:36:33Z", - "avg_ns": 11809875581, - "stddev_ns": 379385205, - "avg_ts": 43.382852, - "stddev_ts": 1.368343, - "samples_ns": [ - 11581540177, - 11600268161, - 12247818406 - ], - "samples_ts": [ - 44.2083, - 44.1369, - 41.8034 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 141 - }, - { - "timestamp_utc": "2025-12-08T21:37:28.372808+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:37:09Z\",\n \"avg_ns\": 2322246253,\n \"stddev_ns\": 246198916,\n \"avg_ts\": 222.076480,\n \"stddev_ts\": 22.666421,\n \"samples_ns\": [ 2251509452, 2596070384, 2119158923 ],\n \"samples_ts\": [ 227.403, 197.221, 241.605 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:37:18Z\",\n \"avg_ns\": 3099381827,\n \"stddev_ns\": 7733273,\n \"avg_ts\": 41.298729,\n \"stddev_ts\": 0.103008,\n \"samples_ns\": [ 3107416910, 3098737031, 3091991542 ],\n \"samples_ts\": [ 41.1918, 41.3072, 41.3973 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:37:09Z", - "avg_ns": 2322246253, - "stddev_ns": 246198916, - "avg_ts": 222.07648, - "stddev_ts": 22.666421, - "samples_ns": [ - 2251509452, - 2596070384, - 2119158923 - ], - "samples_ts": [ - 227.403, - 197.221, - 241.605 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T21:37:18Z", - "avg_ns": 3099381827, - "stddev_ns": 7733273, - "avg_ts": 41.298729, - "stddev_ts": 0.103008, - "samples_ns": [ - 3107416910, - 3098737031, - 3091991542 - ], - "samples_ts": [ - 41.1918, - 41.3072, - 41.3973 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 142 - }, - { - "timestamp_utc": "2025-12-08T21:38:15.889526+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:37:28Z\",\n \"avg_ns\": 2326568199,\n \"stddev_ns\": 354342168,\n \"avg_ts\": 223.239924,\n \"stddev_ts\": 31.253433,\n \"samples_ns\": [ 2118466032, 2125531618, 2735706949 ],\n \"samples_ts\": [ 241.684, 240.881, 187.155 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:37:38Z\",\n \"avg_ns\": 12581414999,\n \"stddev_ns\": 347347046,\n \"avg_ts\": 40.715313,\n \"stddev_ts\": 1.106597,\n \"samples_ns\": [ 12365380154, 12396777926, 12982086919 ],\n \"samples_ts\": [ 41.4059, 41.3011, 39.439 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:37:28Z", - "avg_ns": 2326568199, - "stddev_ns": 354342168, - "avg_ts": 223.239924, - "stddev_ts": 31.253433, - "samples_ns": [ - 2118466032, - 2125531618, - 2735706949 - ], - "samples_ts": [ - 241.684, - 240.881, - 187.155 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_type": "gemma3 270M Q8_0", - "model_size": 285018624, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T21:37:38Z", - "avg_ns": 12581414999, - "stddev_ns": 347347046, - "avg_ts": 40.715313, - "stddev_ts": 1.106597, - "samples_ns": [ - 12365380154, - 12396777926, - 12982086919 - ], - "samples_ts": [ - 41.4059, - 41.3011, - 39.439 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 143 - }, - { - "timestamp_utc": "2025-12-08T21:38:35.528304+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:38:17Z\",\n \"avg_ns\": 987494991,\n \"stddev_ns\": 1777260,\n \"avg_ts\": 129.621190,\n \"stddev_ts\": 0.233136,\n \"samples_ns\": [ 989457186, 985993320, 987034467 ],\n \"samples_ts\": [ 129.364, 129.818, 129.681 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:38:21Z\",\n \"avg_ns\": 4723068809,\n \"stddev_ns\": 399310868,\n \"avg_ts\": 27.224723,\n \"stddev_ts\": 2.194691,\n \"samples_ns\": [ 5184108066, 4498140943, 4486957419 ],\n \"samples_ts\": [ 24.6908, 28.4562, 28.5271 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:38:17Z", - "avg_ns": 987494991, - "stddev_ns": 1777260, - "avg_ts": 129.62119, - "stddev_ts": 0.233136, - "samples_ns": [ - 989457186, - 985993320, - 987034467 - ], - "samples_ts": [ - 129.364, - 129.818, - 129.681 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T21:38:21Z", - "avg_ns": 4723068809, - "stddev_ns": 399310868, - "avg_ts": 27.224723, - "stddev_ts": 2.194691, - "samples_ns": [ - 5184108066, - 4498140943, - 4486957419 - ], - "samples_ts": [ - 24.6908, - 28.4562, - 28.5271 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 144 - }, - { - "timestamp_utc": "2025-12-08T21:39:35.378911+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:38:36Z\",\n \"avg_ns\": 990718959,\n \"stddev_ns\": 985425,\n \"avg_ts\": 129.199187,\n \"stddev_ts\": 0.128308,\n \"samples_ns\": [ 990047924, 991849062, 990259893 ],\n \"samples_ts\": [ 129.287, 129.052, 129.259 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:38:40Z\",\n \"avg_ns\": 18386325318,\n \"stddev_ns\": 57390748,\n \"avg_ts\": 27.846963,\n \"stddev_ts\": 0.086991,\n \"samples_ns\": [ 18324516767, 18437929239, 18396529948 ],\n \"samples_ts\": [ 27.9407, 27.7688, 27.8313 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:38:36Z", - "avg_ns": 990718959, - "stddev_ns": 985425, - "avg_ts": 129.199187, - "stddev_ts": 0.128308, - "samples_ns": [ - 990047924, - 991849062, - 990259893 - ], - "samples_ts": [ - 129.287, - 129.052, - 129.259 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T21:38:40Z", - "avg_ns": 18386325318, - "stddev_ns": 57390748, - "avg_ts": 27.846963, - "stddev_ts": 0.086991, - "samples_ns": [ - 18324516767, - 18437929239, - 18396529948 - ], - "samples_ts": [ - 27.9407, - 27.7688, - 27.8313 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 145 - }, - { - "timestamp_utc": "2025-12-08T21:40:06.020941+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:39:35Z\",\n \"avg_ns\": 4058196063,\n \"stddev_ns\": 5440965,\n \"avg_ts\": 126.164583,\n \"stddev_ts\": 0.169278,\n \"samples_ns\": [ 4051948251, 4060747480, 4061892458 ],\n \"samples_ts\": [ 126.359, 126.085, 126.05 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:39:52Z\",\n \"avg_ns\": 4561687431,\n \"stddev_ns\": 22600214,\n \"avg_ts\": 28.060252,\n \"stddev_ts\": 0.139226,\n \"samples_ns\": [ 4566364498, 4537114714, 4581583082 ],\n \"samples_ts\": [ 28.0311, 28.2118, 27.9379 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:39:35Z", - "avg_ns": 4058196063, - "stddev_ns": 5440965, - "avg_ts": 126.164583, - "stddev_ts": 0.169278, - "samples_ns": [ - 4051948251, - 4060747480, - 4061892458 - ], - "samples_ts": [ - 126.359, - 126.085, - 126.05 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T21:39:52Z", - "avg_ns": 4561687431, - "stddev_ns": 22600214, - "avg_ts": 28.060252, - "stddev_ts": 0.139226, - "samples_ns": [ - 4566364498, - 4537114714, - 4581583082 - ], - "samples_ts": [ - 28.0311, - 28.2118, - 27.9379 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 146 - }, - { - "timestamp_utc": "2025-12-08T21:41:23.434478+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:40:06Z\",\n \"avg_ns\": 4210017791,\n \"stddev_ns\": 143844573,\n \"avg_ts\": 121.710130,\n \"stddev_ts\": 4.190766,\n \"samples_ns\": [ 4230652019, 4342430943, 4056970412 ],\n \"samples_ts\": [ 121.022, 117.906, 126.203 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:40:23Z\",\n \"avg_ns\": 20002618365,\n \"stddev_ns\": 31471970,\n \"avg_ts\": 25.596691,\n \"stddev_ts\": 0.040247,\n \"samples_ns\": [ 19976950205, 19993174114, 20037730777 ],\n \"samples_ts\": [ 25.6295, 25.6087, 25.5518 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:40:06Z", - "avg_ns": 4210017791, - "stddev_ns": 143844573, - "avg_ts": 121.71013, - "stddev_ts": 4.190766, - "samples_ns": [ - 4230652019, - 4342430943, - 4056970412 - ], - "samples_ts": [ - 121.022, - 117.906, - 126.203 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T21:40:23Z", - "avg_ns": 20002618365, - "stddev_ns": 31471970, - "avg_ts": 25.596691, - "stddev_ts": 0.040247, - "samples_ns": [ - 19976950205, - 19993174114, - 20037730777 - ], - "samples_ts": [ - 25.6295, - 25.6087, - 25.5518 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 147 - }, - { - "timestamp_utc": "2025-12-08T21:41:41.688138+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:41:24Z\",\n \"avg_ns\": 991722721,\n \"stddev_ns\": 2856165,\n \"avg_ts\": 129.069049,\n \"stddev_ts\": 0.372164,\n \"samples_ns\": [ 993954209, 988504349, 992709607 ],\n \"samples_ts\": [ 128.779, 129.489, 128.94 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:41:28Z\",\n \"avg_ns\": 4528113471,\n \"stddev_ns\": 43694907,\n \"avg_ts\": 28.269588,\n \"stddev_ts\": 0.271301,\n \"samples_ns\": [ 4578495791, 4505258296, 4500586327 ],\n \"samples_ts\": [ 27.9568, 28.4112, 28.4407 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:41:24Z", - "avg_ns": 991722721, - "stddev_ns": 2856165, - "avg_ts": 129.069049, - "stddev_ts": 0.372164, - "samples_ns": [ - 993954209, - 988504349, - 992709607 - ], - "samples_ts": [ - 128.779, - 129.489, - 128.94 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T21:41:28Z", - "avg_ns": 4528113471, - "stddev_ns": 43694907, - "avg_ts": 28.269588, - "stddev_ts": 0.271301, - "samples_ns": [ - 4578495791, - 4505258296, - 4500586327 - ], - "samples_ts": [ - 27.9568, - 28.4112, - 28.4407 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 148 - }, - { - "timestamp_utc": "2025-12-08T21:42:42.224977+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:41:42Z\",\n \"avg_ns\": 989668256,\n \"stddev_ns\": 132773,\n \"avg_ts\": 129.336271,\n \"stddev_ts\": 0.017353,\n \"samples_ns\": [ 989732589, 989756608, 989515571 ],\n \"samples_ts\": [ 129.328, 129.325, 129.356 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:41:46Z\",\n \"avg_ns\": 18628611507,\n \"stddev_ns\": 452043514,\n \"avg_ts\": 27.495248,\n \"stddev_ts\": 0.658013,\n \"samples_ns\": [ 19150408308, 18355905791, 18379520422 ],\n \"samples_ts\": [ 26.7357, 27.8929, 27.8571 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:41:42Z", - "avg_ns": 989668256, - "stddev_ns": 132773, - "avg_ts": 129.336271, - "stddev_ts": 0.017353, - "samples_ns": [ - 989732589, - 989756608, - 989515571 - ], - "samples_ts": [ - 129.328, - 129.325, - 129.356 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T21:41:46Z", - "avg_ns": 18628611507, - "stddev_ns": 452043514, - "avg_ts": 27.495248, - "stddev_ts": 0.658013, - "samples_ns": [ - 19150408308, - 18355905791, - 18379520422 - ], - "samples_ts": [ - 26.7357, - 27.8929, - 27.8571 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 149 - }, - { - "timestamp_utc": "2025-12-08T21:43:13.580068+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:42:42Z\",\n \"avg_ns\": 4086252867,\n \"stddev_ns\": 625248,\n \"avg_ts\": 125.298170,\n \"stddev_ts\": 0.019072,\n \"samples_ns\": [ 4086306283, 4086846409, 4085605910 ],\n \"samples_ts\": [ 125.297, 125.28, 125.318 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:42:59Z\",\n \"avg_ns\": 4764047482,\n \"stddev_ns\": 395998215,\n \"avg_ts\": 26.986580,\n \"stddev_ts\": 2.141528,\n \"samples_ns\": [ 5220828403, 4517547072, 4553766973 ],\n \"samples_ts\": [ 24.5172, 28.334, 28.1086 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:42:42Z", - "avg_ns": 4086252867, - "stddev_ns": 625248, - "avg_ts": 125.29817, - "stddev_ts": 0.019072, - "samples_ns": [ - 4086306283, - 4086846409, - 4085605910 - ], - "samples_ts": [ - 125.297, - 125.28, - 125.318 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T21:42:59Z", - "avg_ns": 4764047482, - "stddev_ns": 395998215, - "avg_ts": 26.98658, - "stddev_ts": 2.141528, - "samples_ns": [ - 5220828403, - 4517547072, - 4553766973 - ], - "samples_ts": [ - 24.5172, - 28.334, - 28.1086 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 150 - }, - { - "timestamp_utc": "2025-12-08T21:44:26.934350+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:43:14Z\",\n \"avg_ns\": 4217878132,\n \"stddev_ns\": 254522450,\n \"avg_ts\": 121.673477,\n \"stddev_ts\": 7.095048,\n \"samples_ns\": [ 4071504411, 4511774576, 4070355411 ],\n \"samples_ts\": [ 125.752, 113.481, 125.788 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:43:30Z\",\n \"avg_ns\": 18640692053,\n \"stddev_ns\": 457554472,\n \"avg_ts\": 27.477681,\n \"stddev_ts\": 0.665463,\n \"samples_ns\": [ 19166469045, 18422803477, 18332803638 ],\n \"samples_ts\": [ 26.7133, 27.7916, 27.9281 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:43:14Z", - "avg_ns": 4217878132, - "stddev_ns": 254522450, - "avg_ts": 121.673477, - "stddev_ts": 7.095048, - "samples_ns": [ - 4071504411, - 4511774576, - 4070355411 - ], - "samples_ts": [ - 125.752, - 113.481, - 125.788 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T21:43:30Z", - "avg_ns": 18640692053, - "stddev_ns": 457554472, - "avg_ts": 27.477681, - "stddev_ts": 0.665463, - "samples_ns": [ - 19166469045, - 18422803477, - 18332803638 - ], - "samples_ts": [ - 26.7133, - 27.7916, - 27.9281 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 151 - }, - { - "timestamp_utc": "2025-12-08T21:44:45.149899+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:44:27Z\",\n \"avg_ns\": 992457546,\n \"stddev_ns\": 165154,\n \"avg_ts\": 128.972774,\n \"stddev_ts\": 0.021463,\n \"samples_ns\": [ 992480818, 992281990, 992609830 ],\n \"samples_ts\": [ 128.97, 128.996, 128.953 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:44:31Z\",\n \"avg_ns\": 4520743498,\n \"stddev_ns\": 49380672,\n \"avg_ts\": 28.316174,\n \"stddev_ts\": 0.308642,\n \"samples_ns\": [ 4573140561, 4514022060, 4475067874 ],\n \"samples_ts\": [ 27.9895, 28.3561, 28.6029 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:44:27Z", - "avg_ns": 992457546, - "stddev_ns": 165154, - "avg_ts": 128.972774, - "stddev_ts": 0.021463, - "samples_ns": [ - 992480818, - 992281990, - 992609830 - ], - "samples_ts": [ - 128.97, - 128.996, - 128.953 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T21:44:31Z", - "avg_ns": 4520743498, - "stddev_ns": 49380672, - "avg_ts": 28.316174, - "stddev_ts": 0.308642, - "samples_ns": [ - 4573140561, - 4514022060, - 4475067874 - ], - "samples_ts": [ - 27.9895, - 28.3561, - 28.6029 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 152 - }, - { - "timestamp_utc": "2025-12-08T21:45:45.486557+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:44:45Z\",\n \"avg_ns\": 991256536,\n \"stddev_ns\": 3687146,\n \"avg_ts\": 129.130228,\n \"stddev_ts\": 0.481278,\n \"samples_ns\": [ 993019932, 993730565, 987019113 ],\n \"samples_ts\": [ 128.9, 128.808, 129.683 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:44:49Z\",\n \"avg_ns\": 18554085295,\n \"stddev_ns\": 3063074679,\n \"avg_ts\": 27.603400,\n \"stddev_ts\": 0.586111,\n \"samples_ns\": [ 19014226630, 18307986524, 18340042732 ],\n \"samples_ts\": [ 26.9272, 27.9659, 27.9171 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:44:45Z", - "avg_ns": 991256536, - "stddev_ns": 3687146, - "avg_ts": 129.130228, - "stddev_ts": 0.481278, - "samples_ns": [ - 993019932, - 993730565, - 987019113 - ], - "samples_ts": [ - 128.9, - 128.808, - 129.683 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T21:44:49Z", - "avg_ns": 18554085295, - "stddev_ns": 3063074679, - "avg_ts": 27.6034, - "stddev_ts": 0.586111, - "samples_ns": [ - 19014226630, - 18307986524, - 18340042732 - ], - "samples_ts": [ - 26.9272, - 27.9659, - 27.9171 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 153 - }, - { - "timestamp_utc": "2025-12-08T21:46:18.186812+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:45:46Z\",\n \"avg_ns\": 4397510845,\n \"stddev_ns\": 249463850,\n \"avg_ts\": 116.671907,\n \"stddev_ts\": 6.409644,\n \"samples_ns\": [ 4245976687, 4685434144, 4261121704 ],\n \"samples_ts\": [ 120.585, 109.275, 120.156 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:46:03Z\",\n \"avg_ns\": 4848237462,\n \"stddev_ns\": 17196459,\n \"avg_ts\": 26.401569,\n \"stddev_ts\": 0.093835,\n \"samples_ns\": [ 4858493607, 4857834103, 4828384678 ],\n \"samples_ts\": [ 26.3456, 26.3492, 26.5099 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:45:46Z", - "avg_ns": 4397510845, - "stddev_ns": 249463850, - "avg_ts": 116.671907, - "stddev_ts": 6.409644, - "samples_ns": [ - 4245976687, - 4685434144, - 4261121704 - ], - "samples_ts": [ - 120.585, - 109.275, - 120.156 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T21:46:03Z", - "avg_ns": 4848237462, - "stddev_ns": 17196459, - "avg_ts": 26.401569, - "stddev_ts": 0.093835, - "samples_ns": [ - 4858493607, - 4857834103, - 4828384678 - ], - "samples_ts": [ - 26.3456, - 26.3492, - 26.5099 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 154 - }, - { - "timestamp_utc": "2025-12-08T21:47:32.242642+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:46:18Z\",\n \"avg_ns\": 4254952396,\n \"stddev_ns\": 10673091,\n \"avg_ts\": 120.330875,\n \"stddev_ts\": 0.301495,\n \"samples_ns\": [ 4251301533, 4246583796, 4266971859 ],\n \"samples_ts\": [ 120.434, 120.568, 119.991 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:46:36Z\",\n \"avg_ns\": 18620284032,\n \"stddev_ns\": 379243439,\n \"avg_ts\": 27.504417,\n \"stddev_ts\": 0.553968,\n \"samples_ns\": [ 19056062368, 18439791382, 18364998347 ],\n \"samples_ts\": [ 26.8681, 27.766, 27.8791 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:46:18Z", - "avg_ns": 4254952396, - "stddev_ns": 10673091, - "avg_ts": 120.330875, - "stddev_ts": 0.301495, - "samples_ns": [ - 4251301533, - 4246583796, - 4266971859 - ], - "samples_ts": [ - 120.434, - 120.568, - 119.991 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T21:46:36Z", - "avg_ns": 18620284032, - "stddev_ns": 379243439, - "avg_ts": 27.504417, - "stddev_ts": 0.553968, - "samples_ns": [ - 19056062368, - 18439791382, - 18364998347 - ], - "samples_ts": [ - 26.8681, - 27.766, - 27.8791 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 155 - }, - { - "timestamp_utc": "2025-12-08T21:47:51.105243+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:47:32Z\",\n \"avg_ns\": 990068434,\n \"stddev_ns\": 1613738,\n \"avg_ts\": 129.284222,\n \"stddev_ts\": 0.210849,\n \"samples_ns\": [ 991293932, 990670913, 988240458 ],\n \"samples_ts\": [ 129.124, 129.205, 129.523 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:47:36Z\",\n \"avg_ns\": 4727261380,\n \"stddev_ns\": 361506748,\n \"avg_ts\": 27.178606,\n \"stddev_ts\": 1.993348,\n \"samples_ns\": [ 5143331727, 4490050528, 4548401887 ],\n \"samples_ts\": [ 24.8866, 28.5075, 28.1418 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:47:32Z", - "avg_ns": 990068434, - "stddev_ns": 1613738, - "avg_ts": 129.284222, - "stddev_ts": 0.210849, - "samples_ns": [ - 991293932, - 990670913, - 988240458 - ], - "samples_ts": [ - 129.124, - 129.205, - 129.523 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T21:47:36Z", - "avg_ns": 4727261380, - "stddev_ns": 361506748, - "avg_ts": 27.178606, - "stddev_ts": 1.993348, - "samples_ns": [ - 5143331727, - 4490050528, - 4548401887 - ], - "samples_ts": [ - 24.8866, - 28.5075, - 28.1418 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 156 - }, - { - "timestamp_utc": "2025-12-08T21:48:51.258705+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:47:51Z\",\n \"avg_ns\": 990262479,\n \"stddev_ns\": 1381677,\n \"avg_ts\": 129.258827,\n \"stddev_ts\": 0.180317,\n \"samples_ns\": [ 991619357, 988857952, 990310129 ],\n \"samples_ts\": [ 129.082, 129.442, 129.252 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:47:55Z\",\n \"avg_ns\": 18496234806,\n \"stddev_ns\": 89605302,\n \"avg_ts\": 27.681743,\n \"stddev_ts\": 0.134365,\n \"samples_ns\": [ 18569972118, 18522224855, 18396507445 ],\n \"samples_ts\": [ 27.5714, 27.6425, 27.8314 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:47:51Z", - "avg_ns": 990262479, - "stddev_ns": 1381677, - "avg_ts": 129.258827, - "stddev_ts": 0.180317, - "samples_ns": [ - 991619357, - 988857952, - 990310129 - ], - "samples_ts": [ - 129.082, - 129.442, - 129.252 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T21:47:55Z", - "avg_ns": 18496234806, - "stddev_ns": 89605302, - "avg_ts": 27.681743, - "stddev_ts": 0.134365, - "samples_ns": [ - 18569972118, - 18522224855, - 18396507445 - ], - "samples_ts": [ - 27.5714, - 27.6425, - 27.8314 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 157 - }, - { - "timestamp_utc": "2025-12-08T21:49:22.131342+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:48:51Z\",\n \"avg_ns\": 4227170487,\n \"stddev_ns\": 261673108,\n \"avg_ts\": 121.420792,\n \"stddev_ts\": 7.259472,\n \"samples_ns\": [ 4064423308, 4088072257, 4529015898 ],\n \"samples_ts\": [ 125.971, 125.242, 113.049 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:49:08Z\",\n \"avg_ns\": 4476731998,\n \"stddev_ns\": 17313626,\n \"avg_ts\": 28.592571,\n \"stddev_ts\": 0.110689,\n \"samples_ns\": [ 4492353653, 4458116792, 4479725549 ],\n \"samples_ts\": [ 28.4929, 28.7117, 28.5732 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:48:51Z", - "avg_ns": 4227170487, - "stddev_ns": 261673108, - "avg_ts": 121.420792, - "stddev_ts": 7.259472, - "samples_ns": [ - 4064423308, - 4088072257, - 4529015898 - ], - "samples_ts": [ - 125.971, - 125.242, - 113.049 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T21:49:08Z", - "avg_ns": 4476731998, - "stddev_ns": 17313626, - "avg_ts": 28.592571, - "stddev_ts": 0.110689, - "samples_ns": [ - 4492353653, - 4458116792, - 4479725549 - ], - "samples_ts": [ - 28.4929, - 28.7117, - 28.5732 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 158 - }, - { - "timestamp_utc": "2025-12-08T21:50:35.246565+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:49:22Z\",\n \"avg_ns\": 4080021160,\n \"stddev_ns\": 10631483,\n \"avg_ts\": 125.490112,\n \"stddev_ts\": 0.326543,\n \"samples_ns\": [ 4072327929, 4075583398, 4092152155 ],\n \"samples_ts\": [ 125.727, 125.626, 125.118 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:49:39Z\",\n \"avg_ns\": 18690826970,\n \"stddev_ns\": 380262909,\n \"avg_ts\": 27.400590,\n \"stddev_ts\": 0.550995,\n \"samples_ns\": [ 18466372479, 19129879882, 18476228549 ],\n \"samples_ts\": [ 27.7261, 26.7644, 27.7113 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:49:22Z", - "avg_ns": 4080021160, - "stddev_ns": 10631483, - "avg_ts": 125.490112, - "stddev_ts": 0.326543, - "samples_ns": [ - 4072327929, - 4075583398, - 4092152155 - ], - "samples_ts": [ - 125.727, - 125.626, - 125.118 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T21:49:39Z", - "avg_ns": 18690826970, - "stddev_ns": 380262909, - "avg_ts": 27.40059, - "stddev_ts": 0.550995, - "samples_ns": [ - 18466372479, - 19129879882, - 18476228549 - ], - "samples_ts": [ - 27.7261, - 26.7644, - 27.7113 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 159 - }, - { - "timestamp_utc": "2025-12-08T21:50:54.009058+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:50:35Z\",\n \"avg_ns\": 988065624,\n \"stddev_ns\": 1265457,\n \"avg_ts\": 129.546193,\n \"stddev_ts\": 0.165791,\n \"samples_ns\": [ 989456475, 987756910, 986983488 ],\n \"samples_ts\": [ 129.364, 129.587, 129.688 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:50:39Z\",\n \"avg_ns\": 4698087142,\n \"stddev_ns\": 369479415,\n \"avg_ts\": 27.353019,\n \"stddev_ts\": 2.057756,\n \"samples_ns\": [ 4482331300, 5124715901, 4487214226 ],\n \"samples_ts\": [ 28.5566, 24.977, 28.5255 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:50:35Z", - "avg_ns": 988065624, - "stddev_ns": 1265457, - "avg_ts": 129.546193, - "stddev_ts": 0.165791, - "samples_ns": [ - 989456475, - 987756910, - 986983488 - ], - "samples_ts": [ - 129.364, - 129.587, - 129.688 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T21:50:39Z", - "avg_ns": 4698087142, - "stddev_ns": 369479415, - "avg_ts": 27.353019, - "stddev_ts": 2.057756, - "samples_ns": [ - 4482331300, - 5124715901, - 4487214226 - ], - "samples_ts": [ - 28.5566, - 24.977, - 28.5255 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 160 - }, - { - "timestamp_utc": "2025-12-08T21:51:54.567194+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:50:54Z\",\n \"avg_ns\": 1138433591,\n \"stddev_ns\": 247901389,\n \"avg_ts\": 115.684635,\n \"stddev_ts\": 22.384379,\n \"samples_ns\": [ 990365296, 1000307578, 1424627899 ],\n \"samples_ts\": [ 129.245, 127.961, 89.848 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:50:59Z\",\n \"avg_ns\": 18461455556,\n \"stddev_ns\": 47512891,\n \"avg_ts\": 27.733581,\n \"stddev_ts\": 0.071470,\n \"samples_ns\": [ 18480849097, 18407313394, 18496204177 ],\n \"samples_ts\": [ 27.7044, 27.815, 27.6814 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:50:54Z", - "avg_ns": 1138433591, - "stddev_ns": 247901389, - "avg_ts": 115.684635, - "stddev_ts": 22.384379, - "samples_ns": [ - 990365296, - 1000307578, - 1424627899 - ], - "samples_ts": [ - 129.245, - 127.961, - 89.848 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T21:50:59Z", - "avg_ns": 18461455556, - "stddev_ns": 47512891, - "avg_ts": 27.733581, - "stddev_ts": 0.07147, - "samples_ns": [ - 18480849097, - 18407313394, - 18496204177 - ], - "samples_ts": [ - 27.7044, - 27.815, - 27.6814 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 161 - }, - { - "timestamp_utc": "2025-12-08T21:52:25.476635+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:51:55Z\",\n \"avg_ns\": 4217318536,\n \"stddev_ns\": 247033247,\n \"avg_ts\": 121.673380,\n \"stddev_ts\": 6.894230,\n \"samples_ns\": [ 4078379610, 4502536469, 4071039530 ],\n \"samples_ts\": [ 125.54, 113.714, 125.766 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:52:11Z\",\n \"avg_ns\": 4495041086,\n \"stddev_ns\": 45077436,\n \"avg_ts\": 28.477727,\n \"stddev_ts\": 0.284594,\n \"samples_ns\": [ 4456605686, 4483861316, 4544656258 ],\n \"samples_ts\": [ 28.7214, 28.5468, 28.1649 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:51:55Z", - "avg_ns": 4217318536, - "stddev_ns": 247033247, - "avg_ts": 121.67338, - "stddev_ts": 6.89423, - "samples_ns": [ - 4078379610, - 4502536469, - 4071039530 - ], - "samples_ts": [ - 125.54, - 113.714, - 125.766 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T21:52:11Z", - "avg_ns": 4495041086, - "stddev_ns": 45077436, - "avg_ts": 28.477727, - "stddev_ts": 0.284594, - "samples_ns": [ - 4456605686, - 4483861316, - 4544656258 - ], - "samples_ts": [ - 28.7214, - 28.5468, - 28.1649 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 162 - }, - { - "timestamp_utc": "2025-12-08T21:53:43.868931+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:52:26Z\",\n \"avg_ns\": 4233757106,\n \"stddev_ns\": 133380711,\n \"avg_ts\": 121.014274,\n \"stddev_ts\": 3.880397,\n \"samples_ns\": [ 4080418383, 4297943179, 4322909758 ],\n \"samples_ts\": [ 125.477, 119.127, 118.439 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:52:42Z\",\n \"avg_ns\": 20299047242,\n \"stddev_ns\": 317760888,\n \"avg_ts\": 25.227014,\n \"stddev_ts\": 0.398159,\n \"samples_ns\": [ 19936170480, 20527520969, 20433450277 ],\n \"samples_ts\": [ 25.682, 24.9421, 25.057 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:52:26Z", - "avg_ns": 4233757106, - "stddev_ns": 133380711, - "avg_ts": 121.014274, - "stddev_ts": 3.880397, - "samples_ns": [ - 4080418383, - 4297943179, - 4322909758 - ], - "samples_ts": [ - 125.477, - 119.127, - 118.439 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T21:52:42Z", - "avg_ns": 20299047242, - "stddev_ns": 317760888, - "avg_ts": 25.227014, - "stddev_ts": 0.398159, - "samples_ns": [ - 19936170480, - 20527520969, - 20433450277 - ], - "samples_ts": [ - 25.682, - 24.9421, - 25.057 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 163 - }, - { - "timestamp_utc": "2025-12-08T21:54:01.988919+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:53:44Z\",\n \"avg_ns\": 990486913,\n \"stddev_ns\": 3921874,\n \"avg_ts\": 129.230718,\n \"stddev_ts\": 0.510531,\n \"samples_ns\": [ 995006554, 988471523, 987982663 ],\n \"samples_ts\": [ 128.642, 129.493, 129.557 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:53:48Z\",\n \"avg_ns\": 4478731012,\n \"stddev_ns\": 29761656,\n \"avg_ts\": 28.580367,\n \"stddev_ts\": 0.190220,\n \"samples_ns\": [ 4505804831, 4446863583, 4483524623 ],\n \"samples_ts\": [ 28.4078, 28.7843, 28.549 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:53:44Z", - "avg_ns": 990486913, - "stddev_ns": 3921874, - "avg_ts": 129.230718, - "stddev_ts": 0.510531, - "samples_ns": [ - 995006554, - 988471523, - 987982663 - ], - "samples_ts": [ - 128.642, - 129.493, - 129.557 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T21:53:48Z", - "avg_ns": 4478731012, - "stddev_ns": 29761656, - "avg_ts": 28.580367, - "stddev_ts": 0.19022, - "samples_ns": [ - 4505804831, - 4446863583, - 4483524623 - ], - "samples_ts": [ - 28.4078, - 28.7843, - 28.549 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 164 - }, - { - "timestamp_utc": "2025-12-08T21:55:01.754088+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:54:02Z\",\n \"avg_ns\": 992467405,\n \"stddev_ns\": 3977079,\n \"avg_ts\": 128.972868,\n \"stddev_ts\": 0.515795,\n \"samples_ns\": [ 990895033, 996990287, 989516895 ],\n \"samples_ts\": [ 129.176, 128.386, 129.356 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:54:06Z\",\n \"avg_ns\": 18345583666,\n \"stddev_ns\": 41746731,\n \"avg_ts\": 27.908720,\n \"stddev_ts\": 0.063426,\n \"samples_ns\": [ 18323466877, 18319548841, 18393735281 ],\n \"samples_ts\": [ 27.9423, 27.9483, 27.8356 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:54:02Z", - "avg_ns": 992467405, - "stddev_ns": 3977079, - "avg_ts": 128.972868, - "stddev_ts": 0.515795, - "samples_ns": [ - 990895033, - 996990287, - 989516895 - ], - "samples_ts": [ - 129.176, - 128.386, - 129.356 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T21:54:06Z", - "avg_ns": 18345583666, - "stddev_ns": 41746731, - "avg_ts": 27.90872, - "stddev_ts": 0.063426, - "samples_ns": [ - 18323466877, - 18319548841, - 18393735281 - ], - "samples_ts": [ - 27.9423, - 27.9483, - 27.8356 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 165 - }, - { - "timestamp_utc": "2025-12-08T21:55:33.138222+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:55:02Z\",\n \"avg_ns\": 4247850095,\n \"stddev_ns\": 6032759,\n \"avg_ts\": 120.531722,\n \"stddev_ts\": 0.171064,\n \"samples_ns\": [ 4245672481, 4243208484, 4254669320 ],\n \"samples_ts\": [ 120.593, 120.663, 120.338 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:55:19Z\",\n \"avg_ns\": 4539923173,\n \"stddev_ns\": 11930775,\n \"avg_ts\": 28.194439,\n \"stddev_ts\": 0.073995,\n \"samples_ns\": [ 4553499189, 4531108417, 4535161914 ],\n \"samples_ts\": [ 28.1102, 28.2492, 28.2239 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:55:02Z", - "avg_ns": 4247850095, - "stddev_ns": 6032759, - "avg_ts": 120.531722, - "stddev_ts": 0.171064, - "samples_ns": [ - 4245672481, - 4243208484, - 4254669320 - ], - "samples_ts": [ - 120.593, - 120.663, - 120.338 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T21:55:19Z", - "avg_ns": 4539923173, - "stddev_ns": 11930775, - "avg_ts": 28.194439, - "stddev_ts": 0.073995, - "samples_ns": [ - 4553499189, - 4531108417, - 4535161914 - ], - "samples_ts": [ - 28.1102, - 28.2492, - 28.2239 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 166 - }, - { - "timestamp_utc": "2025-12-08T21:56:46.478903+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:55:33Z\",\n \"avg_ns\": 4260468379,\n \"stddev_ns\": 8168527,\n \"avg_ts\": 120.174875,\n \"stddev_ts\": 0.230338,\n \"samples_ns\": [ 4259663024, 4252732621, 4269009493 ],\n \"samples_ts\": [ 120.197, 120.393, 119.934 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:55:50Z\",\n \"avg_ns\": 18524957719,\n \"stddev_ns\": 117849135,\n \"avg_ts\": 27.639134,\n \"stddev_ts\": 0.175471,\n \"samples_ns\": [ 18653911828, 18498117709, 18422843620 ],\n \"samples_ts\": [ 27.4473, 27.6785, 27.7916 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:55:33Z", - "avg_ns": 4260468379, - "stddev_ns": 8168527, - "avg_ts": 120.174875, - "stddev_ts": 0.230338, - "samples_ns": [ - 4259663024, - 4252732621, - 4269009493 - ], - "samples_ts": [ - 120.197, - 120.393, - 119.934 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T21:55:50Z", - "avg_ns": 18524957719, - "stddev_ns": 117849135, - "avg_ts": 27.639134, - "stddev_ts": 0.175471, - "samples_ns": [ - 18653911828, - 18498117709, - 18422843620 - ], - "samples_ts": [ - 27.4473, - 27.6785, - 27.7916 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 167 - }, - { - "timestamp_utc": "2025-12-08T21:57:06.154054+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:56:47Z\",\n \"avg_ns\": 991683157,\n \"stddev_ns\": 2701772,\n \"avg_ts\": 129.074123,\n \"stddev_ts\": 0.351776,\n \"samples_ns\": [ 991913247, 994262526, 988873698 ],\n \"samples_ts\": [ 129.044, 128.739, 129.44 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:56:51Z\",\n \"avg_ns\": 4990730230,\n \"stddev_ns\": 33410669,\n \"avg_ts\": 25.648313,\n \"stddev_ts\": 0.171137,\n \"samples_ns\": [ 4965771209, 4977733217, 5028686265 ],\n \"samples_ts\": [ 25.7765, 25.7145, 25.454 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:56:47Z", - "avg_ns": 991683157, - "stddev_ns": 2701772, - "avg_ts": 129.074123, - "stddev_ts": 0.351776, - "samples_ns": [ - 991913247, - 994262526, - 988873698 - ], - "samples_ts": [ - 129.044, - 128.739, - 129.44 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T21:56:51Z", - "avg_ns": 4990730230, - "stddev_ns": 33410669, - "avg_ts": 25.648313, - "stddev_ts": 0.171137, - "samples_ns": [ - 4965771209, - 4977733217, - 5028686265 - ], - "samples_ts": [ - 25.7765, - 25.7145, - 25.454 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 168 - }, - { - "timestamp_utc": "2025-12-08T21:58:06.912513+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:57:06Z\",\n \"avg_ns\": 990750330,\n \"stddev_ns\": 1654783,\n \"avg_ts\": 129.195251,\n \"stddev_ts\": 0.215794,\n \"samples_ns\": [ 991020426, 988977720, 992252846 ],\n \"samples_ts\": [ 129.16, 129.427, 128.999 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:57:10Z\",\n \"avg_ns\": 18686917356,\n \"stddev_ns\": 596369248,\n \"avg_ts\": 27.417129,\n \"stddev_ts\": 0.859243,\n \"samples_ns\": [ 19375123617, 18321948738, 18363679714 ],\n \"samples_ts\": [ 26.4256, 27.9446, 27.8811 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:57:06Z", - "avg_ns": 990750330, - "stddev_ns": 1654783, - "avg_ts": 129.195251, - "stddev_ts": 0.215794, - "samples_ns": [ - 991020426, - 988977720, - 992252846 - ], - "samples_ts": [ - 129.16, - 129.427, - 128.999 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T21:57:10Z", - "avg_ns": 18686917356, - "stddev_ns": 596369248, - "avg_ts": 27.417129, - "stddev_ts": 0.859243, - "samples_ns": [ - 19375123617, - 18321948738, - 18363679714 - ], - "samples_ts": [ - 26.4256, - 27.9446, - 27.8811 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 169 - }, - { - "timestamp_utc": "2025-12-08T21:58:38.019494+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:58:07Z\",\n \"avg_ns\": 4080458484,\n \"stddev_ns\": 19352060,\n \"avg_ts\": 125.477977,\n \"stddev_ts\": 0.594847,\n \"samples_ns\": [ 4079308890, 4100359610, 4061706953 ],\n \"samples_ts\": [ 125.511, 124.867, 126.055 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:58:23Z\",\n \"avg_ns\": 4696428309,\n \"stddev_ns\": 354113382,\n \"avg_ts\": 27.354107,\n \"stddev_ts\": 1.976493,\n \"samples_ns\": [ 4494357442, 5105314049, 4489613436 ],\n \"samples_ts\": [ 28.4802, 25.0719, 28.5102 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:58:07Z", - "avg_ns": 4080458484, - "stddev_ns": 19352060, - "avg_ts": 125.477977, - "stddev_ts": 0.594847, - "samples_ns": [ - 4079308890, - 4100359610, - 4061706953 - ], - "samples_ts": [ - 125.511, - 124.867, - 126.055 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T21:58:23Z", - "avg_ns": 4696428309, - "stddev_ns": 354113382, - "avg_ts": 27.354107, - "stddev_ts": 1.976493, - "samples_ns": [ - 4494357442, - 5105314049, - 4489613436 - ], - "samples_ts": [ - 28.4802, - 25.0719, - 28.5102 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 170 - }, - { - "timestamp_utc": "2025-12-08T21:59:52.809160+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:58:38Z\",\n \"avg_ns\": 4068902659,\n \"stddev_ns\": 3281925,\n \"avg_ts\": 125.832507,\n \"stddev_ts\": 0.101504,\n \"samples_ns\": [ 4070699474, 4070892416, 4065116089 ],\n \"samples_ts\": [ 125.777, 125.771, 125.95 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:58:55Z\",\n \"avg_ns\": 19108939497,\n \"stddev_ns\": 373638994,\n \"avg_ts\": 26.800620,\n \"stddev_ts\": 0.527624,\n \"samples_ns\": [ 19199298212, 18698407494, 19429112786 ],\n \"samples_ts\": [ 26.6676, 27.382, 26.3522 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:58:38Z", - "avg_ns": 4068902659, - "stddev_ns": 3281925, - "avg_ts": 125.832507, - "stddev_ts": 0.101504, - "samples_ns": [ - 4070699474, - 4070892416, - 4065116089 - ], - "samples_ts": [ - 125.777, - 125.771, - 125.95 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T21:58:55Z", - "avg_ns": 19108939497, - "stddev_ns": 373638994, - "avg_ts": 26.80062, - "stddev_ts": 0.527624, - "samples_ns": [ - 19199298212, - 18698407494, - 19429112786 - ], - "samples_ts": [ - 26.6676, - 27.382, - 26.3522 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 171 - }, - { - "timestamp_utc": "2025-12-08T22:00:12.423562+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:59:53Z\",\n \"avg_ns\": 987757609,\n \"stddev_ns\": 1727874,\n \"avg_ts\": 129.586712,\n \"stddev_ts\": 0.226828,\n \"samples_ns\": [ 988351120, 985811499, 989110209 ],\n \"samples_ts\": [ 129.509, 129.842, 129.409 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:59:57Z\",\n \"avg_ns\": 4985937192,\n \"stddev_ns\": 9817185,\n \"avg_ts\": 25.672271,\n \"stddev_ts\": 0.050604,\n \"samples_ns\": [ 4992123725, 4974617915, 4991069937 ],\n \"samples_ts\": [ 25.6404, 25.7306, 25.6458 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T21:59:53Z", - "avg_ns": 987757609, - "stddev_ns": 1727874, - "avg_ts": 129.586712, - "stddev_ts": 0.226828, - "samples_ns": [ - 988351120, - 985811499, - 989110209 - ], - "samples_ts": [ - 129.509, - 129.842, - 129.409 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T21:59:57Z", - "avg_ns": 4985937192, - "stddev_ns": 9817185, - "avg_ts": 25.672271, - "stddev_ts": 0.050604, - "samples_ns": [ - 4992123725, - 4974617915, - 4991069937 - ], - "samples_ts": [ - 25.6404, - 25.7306, - 25.6458 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 172 - }, - { - "timestamp_utc": "2025-12-08T22:01:12.966766+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:00:13Z\",\n \"avg_ns\": 995528362,\n \"stddev_ns\": 108860,\n \"avg_ts\": 128.574942,\n \"stddev_ts\": 0.014059,\n \"samples_ns\": [ 995653745, 995473406, 995457935 ],\n \"samples_ts\": [ 128.559, 128.582, 128.584 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:00:17Z\",\n \"avg_ns\": 18614559063,\n \"stddev_ns\": 342378384,\n \"avg_ts\": 27.511517,\n \"stddev_ts\": 0.502815,\n \"samples_ns\": [ 18528726099, 18991687282, 18323263808 ],\n \"samples_ts\": [ 27.6328, 26.9592, 27.9426 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T22:00:13Z", - "avg_ns": 995528362, - "stddev_ns": 108860, - "avg_ts": 128.574942, - "stddev_ts": 0.014059, - "samples_ns": [ - 995653745, - 995473406, - 995457935 - ], - "samples_ts": [ - 128.559, - 128.582, - 128.584 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T22:00:17Z", - "avg_ns": 18614559063, - "stddev_ns": 342378384, - "avg_ts": 27.511517, - "stddev_ts": 0.502815, - "samples_ns": [ - 18528726099, - 18991687282, - 18323263808 - ], - "samples_ts": [ - 27.6328, - 26.9592, - 27.9426 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 173 - }, - { - "timestamp_utc": "2025-12-08T22:01:43.363773+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:01:13Z\",\n \"avg_ns\": 4064320101,\n \"stddev_ns\": 3083866,\n \"avg_ts\": 125.974378,\n \"stddev_ts\": 0.095586,\n \"samples_ns\": [ 4060760759, 4066120100, 4066079446 ],\n \"samples_ts\": [ 126.085, 125.919, 125.92 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:01:29Z\",\n \"avg_ns\": 4484813587,\n \"stddev_ns\": 14227647,\n \"avg_ts\": 28.540954,\n \"stddev_ts\": 0.090695,\n \"samples_ns\": [ 4494924542, 4490972047, 4468544172 ],\n \"samples_ts\": [ 28.4766, 28.5016, 28.6447 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T22:01:13Z", - "avg_ns": 4064320101, - "stddev_ns": 3083866, - "avg_ts": 125.974378, - "stddev_ts": 0.095586, - "samples_ns": [ - 4060760759, - 4066120100, - 4066079446 - ], - "samples_ts": [ - 126.085, - 125.919, - 125.92 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T22:01:29Z", - "avg_ns": 4484813587, - "stddev_ns": 14227647, - "avg_ts": 28.540954, - "stddev_ts": 0.090695, - "samples_ns": [ - 4494924542, - 4490972047, - 4468544172 - ], - "samples_ts": [ - 28.4766, - 28.5016, - 28.6447 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 174 - }, - { - "timestamp_utc": "2025-12-08T22:02:59.826574+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:01:43Z\",\n \"avg_ns\": 4055325639,\n \"stddev_ns\": 9086677,\n \"avg_ts\": 126.254155,\n \"stddev_ts\": 0.282523,\n \"samples_ns\": [ 4049988936, 4050170734, 4065817248 ],\n \"samples_ts\": [ 126.42, 126.414, 125.928 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:02:00Z\",\n \"avg_ns\": 19835755012,\n \"stddev_ns\": 108456066,\n \"avg_ts\": 25.812489,\n \"stddev_ts\": 0.141108,\n \"samples_ns\": [ 19728672782, 19833057659, 19945534595 ],\n \"samples_ts\": [ 25.9521, 25.8155, 25.6699 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T22:01:43Z", - "avg_ns": 4055325639, - "stddev_ns": 9086677, - "avg_ts": 126.254155, - "stddev_ts": 0.282523, - "samples_ns": [ - 4049988936, - 4050170734, - 4065817248 - ], - "samples_ts": [ - 126.42, - 126.414, - 125.928 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T22:02:00Z", - "avg_ns": 19835755012, - "stddev_ns": 108456066, - "avg_ts": 25.812489, - "stddev_ts": 0.141108, - "samples_ns": [ - 19728672782, - 19833057659, - 19945534595 - ], - "samples_ts": [ - 25.9521, - 25.8155, - 25.6699 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 175 - }, - { - "timestamp_utc": "2025-12-08T22:03:20.014380+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:03:00Z\",\n \"avg_ns\": 994815326,\n \"stddev_ns\": 2869471,\n \"avg_ts\": 128.667811,\n \"stddev_ts\": 0.371511,\n \"samples_ns\": [ 995637042, 991624997, 997183941 ],\n \"samples_ts\": [ 128.561, 129.081, 128.361 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:03:04Z\",\n \"avg_ns\": 5166053292,\n \"stddev_ns\": 374677890,\n \"avg_ts\": 24.860814,\n \"stddev_ts\": 1.730649,\n \"samples_ns\": [ 4945864086, 4953624957, 5598670835 ],\n \"samples_ts\": [ 25.8802, 25.8397, 22.8626 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T22:03:00Z", - "avg_ns": 994815326, - "stddev_ns": 2869471, - "avg_ts": 128.667811, - "stddev_ts": 0.371511, - "samples_ns": [ - 995637042, - 991624997, - 997183941 - ], - "samples_ts": [ - 128.561, - 129.081, - 128.361 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T22:03:04Z", - "avg_ns": 5166053292, - "stddev_ns": 374677890, - "avg_ts": 24.860814, - "stddev_ts": 1.730649, - "samples_ns": [ - 4945864086, - 4953624957, - 5598670835 - ], - "samples_ts": [ - 25.8802, - 25.8397, - 22.8626 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 176 - }, - { - "timestamp_utc": "2025-12-08T22:04:21.116564+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:03:20Z\",\n \"avg_ns\": 985646090,\n \"stddev_ns\": 1006512,\n \"avg_ts\": 129.864147,\n \"stddev_ts\": 0.132591,\n \"samples_ns\": [ 985533107, 984700836, 986704327 ],\n \"samples_ts\": [ 129.879, 129.989, 129.725 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:03:24Z\",\n \"avg_ns\": 18810078414,\n \"stddev_ns\": 373935388,\n \"avg_ts\": 27.226707,\n \"stddev_ts\": 0.547498,\n \"samples_ns\": [ 19040141773, 18378612203, 19011481266 ],\n \"samples_ts\": [ 26.8906, 27.8585, 26.9311 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T22:03:20Z", - "avg_ns": 985646090, - "stddev_ns": 1006512, - "avg_ts": 129.864147, - "stddev_ts": 0.132591, - "samples_ns": [ - 985533107, - 984700836, - 986704327 - ], - "samples_ts": [ - 129.879, - 129.989, - 129.725 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T22:03:24Z", - "avg_ns": 18810078414, - "stddev_ns": 373935388, - "avg_ts": 27.226707, - "stddev_ts": 0.547498, - "samples_ns": [ - 19040141773, - 18378612203, - 19011481266 - ], - "samples_ts": [ - 26.8906, - 27.8585, - 26.9311 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 177 - }, - { - "timestamp_utc": "2025-12-08T22:04:52.383807+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:04:21Z\",\n \"avg_ns\": 4256997797,\n \"stddev_ns\": 9064113,\n \"avg_ts\": 120.272918,\n \"stddev_ts\": 0.256008,\n \"samples_ns\": [ 4256091033, 4248421146, 4266481212 ],\n \"samples_ts\": [ 120.298, 120.515, 120.005 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:04:38Z\",\n \"avg_ns\": 4507838337,\n \"stddev_ns\": 28928468,\n \"avg_ts\": 28.395764,\n \"stddev_ts\": 0.182094,\n \"samples_ns\": [ 4537831068, 4505577094, 4480106849 ],\n \"samples_ts\": [ 28.2073, 28.4092, 28.5707 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T22:04:21Z", - "avg_ns": 4256997797, - "stddev_ns": 9064113, - "avg_ts": 120.272918, - "stddev_ts": 0.256008, - "samples_ns": [ - 4256091033, - 4248421146, - 4266481212 - ], - "samples_ts": [ - 120.298, - 120.515, - 120.005 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T22:04:38Z", - "avg_ns": 4507838337, - "stddev_ns": 28928468, - "avg_ts": 28.395764, - "stddev_ts": 0.182094, - "samples_ns": [ - 4537831068, - 4505577094, - 4480106849 - ], - "samples_ts": [ - 28.2073, - 28.4092, - 28.5707 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 178 - }, - { - "timestamp_utc": "2025-12-08T22:06:05.662682+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:04:53Z\",\n \"avg_ns\": 4264136118,\n \"stddev_ns\": 9295088,\n \"avg_ts\": 120.071594,\n \"stddev_ts\": 0.261657,\n \"samples_ns\": [ 4255265081, 4273803551, 4263339723 ],\n \"samples_ts\": [ 120.322, 119.8, 120.094 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:05:10Z\",\n \"avg_ns\": 18497440796,\n \"stddev_ns\": 64442258,\n \"avg_ts\": 27.679729,\n \"stddev_ts\": 0.096424,\n \"samples_ns\": [ 18562444191, 18433574729, 18496303468 ],\n \"samples_ts\": [ 27.5826, 27.7754, 27.6812 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T22:04:53Z", - "avg_ns": 4264136118, - "stddev_ns": 9295088, - "avg_ts": 120.071594, - "stddev_ts": 0.261657, - "samples_ns": [ - 4255265081, - 4273803551, - 4263339723 - ], - "samples_ts": [ - 120.322, - 119.8, - 120.094 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T22:05:10Z", - "avg_ns": 18497440796, - "stddev_ns": 64442258, - "avg_ts": 27.679729, - "stddev_ts": 0.096424, - "samples_ns": [ - 18562444191, - 18433574729, - 18496303468 - ], - "samples_ts": [ - 27.5826, - 27.7754, - 27.6812 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 179 - }, - { - "timestamp_utc": "2025-12-08T22:06:16.200888+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:06:06Z\",\n \"avg_ns\": 522332810,\n \"stddev_ns\": 2610886,\n \"avg_ts\": 245.058559,\n \"stddev_ts\": 1.221536,\n \"samples_ns\": [ 521148196, 520524452, 525325784 ],\n \"samples_ts\": [ 245.612, 245.906, 243.658 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:06:08Z\",\n \"avg_ns\": 2582138987,\n \"stddev_ns\": 5941397,\n \"avg_ts\": 49.571480,\n \"stddev_ts\": 0.114153,\n \"samples_ns\": [ 2575557231, 2587104798, 2583754934 ],\n \"samples_ts\": [ 49.698, 49.4762, 49.5403 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T22:06:06Z", - "avg_ns": 522332810, - "stddev_ns": 2610886, - "avg_ts": 245.058559, - "stddev_ts": 1.221536, - "samples_ns": [ - 521148196, - 520524452, - 525325784 - ], - "samples_ts": [ - 245.612, - 245.906, - 243.658 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T22:06:08Z", - "avg_ns": 2582138987, - "stddev_ns": 5941397, - "avg_ts": 49.57148, - "stddev_ts": 0.114153, - "samples_ns": [ - 2575557231, - 2587104798, - 2583754934 - ], - "samples_ts": [ - 49.698, - 49.4762, - 49.5403 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 180 - }, - { - "timestamp_utc": "2025-12-08T22:06:51.069316+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:06:16Z\",\n \"avg_ns\": 523769919,\n \"stddev_ns\": 808945,\n \"avg_ts\": 244.382503,\n \"stddev_ts\": 0.377716,\n \"samples_ns\": [ 522855452, 524062214, 524392091 ],\n \"samples_ts\": [ 244.81, 244.246, 244.092 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:06:18Z\",\n \"avg_ns\": 10691751347,\n \"stddev_ns\": 31813669,\n \"avg_ts\": 47.887667,\n \"stddev_ts\": 0.142569,\n \"samples_ns\": [ 10695658899, 10721430747, 10658164395 ],\n \"samples_ts\": [ 47.8699, 47.7548, 48.0383 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T22:06:16Z", - "avg_ns": 523769919, - "stddev_ns": 808945, - "avg_ts": 244.382503, - "stddev_ts": 0.377716, - "samples_ns": [ - 522855452, - 524062214, - 524392091 - ], - "samples_ts": [ - 244.81, - 244.246, - 244.092 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T22:06:18Z", - "avg_ns": 10691751347, - "stddev_ns": 31813669, - "avg_ts": 47.887667, - "stddev_ts": 0.142569, - "samples_ns": [ - 10695658899, - 10721430747, - 10658164395 - ], - "samples_ts": [ - 47.8699, - 47.7548, - 48.0383 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 181 - }, - { - "timestamp_utc": "2025-12-08T22:07:08.691092+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:06:51Z\",\n \"avg_ns\": 2126626660,\n \"stddev_ns\": 5119544,\n \"avg_ts\": 240.757811,\n \"stddev_ts\": 0.579875,\n \"samples_ns\": [ 2121165851, 2131317238, 2127396892 ],\n \"samples_ts\": [ 241.377, 240.227, 240.67 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:07:00Z\",\n \"avg_ns\": 2806431473,\n \"stddev_ns\": 10212573,\n \"avg_ts\": 45.609926,\n \"stddev_ts\": 0.166159,\n \"samples_ns\": [ 2815347294, 2795289840, 2808657287 ],\n \"samples_ts\": [ 45.4651, 45.7913, 45.5734 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T22:06:51Z", - "avg_ns": 2126626660, - "stddev_ns": 5119544, - "avg_ts": 240.757811, - "stddev_ts": 0.579875, - "samples_ns": [ - 2121165851, - 2131317238, - 2127396892 - ], - "samples_ts": [ - 241.377, - 240.227, - 240.67 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T22:07:00Z", - "avg_ns": 2806431473, - "stddev_ns": 10212573, - "avg_ts": 45.609926, - "stddev_ts": 0.166159, - "samples_ns": [ - 2815347294, - 2795289840, - 2808657287 - ], - "samples_ts": [ - 45.4651, - 45.7913, - 45.5734 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 182 - }, - { - "timestamp_utc": "2025-12-08T22:07:49.560044+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:07:09Z\",\n \"avg_ns\": 2119209452,\n \"stddev_ns\": 9415914,\n \"avg_ts\": 241.602698,\n \"stddev_ts\": 1.070715,\n \"samples_ns\": [ 2113922722, 2113625109, 2130080526 ],\n \"samples_ts\": [ 242.204, 242.238, 240.366 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:07:17Z\",\n \"avg_ns\": 10553540679,\n \"stddev_ns\": 60341581,\n \"avg_ts\": 48.515584,\n \"stddev_ts\": 0.278111,\n \"samples_ns\": [ 10601049205, 10573926097, 10485646735 ],\n \"samples_ts\": [ 48.2971, 48.421, 48.8287 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T22:07:09Z", - "avg_ns": 2119209452, - "stddev_ns": 9415914, - "avg_ts": 241.602698, - "stddev_ts": 1.070715, - "samples_ns": [ - 2113922722, - 2113625109, - 2130080526 - ], - "samples_ts": [ - 242.204, - 242.238, - 240.366 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T22:07:17Z", - "avg_ns": 10553540679, - "stddev_ns": 60341581, - "avg_ts": 48.515584, - "stddev_ts": 0.278111, - "samples_ns": [ - 10601049205, - 10573926097, - 10485646735 - ], - "samples_ts": [ - 48.2971, - 48.421, - 48.8287 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 183 - }, - { - "timestamp_utc": "2025-12-08T22:08:00.646594+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:07:50Z\",\n \"avg_ns\": 520755462,\n \"stddev_ns\": 2033150,\n \"avg_ts\": 245.799241,\n \"stddev_ts\": 0.957380,\n \"samples_ns\": [ 519603132, 519560538, 523102718 ],\n \"samples_ts\": [ 246.342, 246.362, 244.694 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:07:52Z\",\n \"avg_ns\": 2769186867,\n \"stddev_ns\": 350278781,\n \"avg_ts\": 46.687184,\n \"stddev_ts\": 5.505177,\n \"samples_ns\": [ 3173502981, 2576598810, 2557458810 ],\n \"samples_ts\": [ 40.334, 49.6779, 50.0497 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T22:07:50Z", - "avg_ns": 520755462, - "stddev_ns": 2033150, - "avg_ts": 245.799241, - "stddev_ts": 0.95738, - "samples_ns": [ - 519603132, - 519560538, - 523102718 - ], - "samples_ts": [ - 246.342, - 246.362, - 244.694 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T22:07:52Z", - "avg_ns": 2769186867, - "stddev_ns": 350278781, - "avg_ts": 46.687184, - "stddev_ts": 5.505177, - "samples_ns": [ - 3173502981, - 2576598810, - 2557458810 - ], - "samples_ts": [ - 40.334, - 49.6779, - 50.0497 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 184 - }, - { - "timestamp_utc": "2025-12-08T22:08:35.142428+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:08:01Z\",\n \"avg_ns\": 522432009,\n \"stddev_ns\": 1069641,\n \"avg_ts\": 245.008642,\n \"stddev_ts\": 0.501188,\n \"samples_ns\": [ 523602886, 521506872, 522186270 ],\n \"samples_ts\": [ 244.46, 245.443, 245.123 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:08:03Z\",\n \"avg_ns\": 10561823622,\n \"stddev_ns\": 22499990,\n \"avg_ts\": 48.476624,\n \"stddev_ts\": 0.103183,\n \"samples_ns\": [ 10555307340, 10586862560, 10543300966 ],\n \"samples_ts\": [ 48.5064, 48.3618, 48.5616 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T22:08:01Z", - "avg_ns": 522432009, - "stddev_ns": 1069641, - "avg_ts": 245.008642, - "stddev_ts": 0.501188, - "samples_ns": [ - 523602886, - 521506872, - 522186270 - ], - "samples_ts": [ - 244.46, - 245.443, - 245.123 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T22:08:03Z", - "avg_ns": 10561823622, - "stddev_ns": 22499990, - "avg_ts": 48.476624, - "stddev_ts": 0.103183, - "samples_ns": [ - 10555307340, - 10586862560, - 10543300966 - ], - "samples_ts": [ - 48.5064, - 48.3618, - 48.5616 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 185 - }, - { - "timestamp_utc": "2025-12-08T22:08:52.747265+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:08:35Z\",\n \"avg_ns\": 2112212402,\n \"stddev_ns\": 1093254,\n \"avg_ts\": 242.399908,\n \"stddev_ts\": 0.125472,\n \"samples_ns\": [ 2113254005, 2112309259, 2111073942 ],\n \"samples_ts\": [ 242.28, 242.389, 242.531 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:08:44Z\",\n \"avg_ns\": 2811355500,\n \"stddev_ns\": 357597755,\n \"avg_ts\": 45.991795,\n \"stddev_ts\": 5.450080,\n \"samples_ns\": [ 2608849642, 2600968111, 3224248747 ],\n \"samples_ts\": [ 49.0638, 49.2124, 39.6992 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T22:08:35Z", - "avg_ns": 2112212402, - "stddev_ns": 1093254, - "avg_ts": 242.399908, - "stddev_ts": 0.125472, - "samples_ns": [ - 2113254005, - 2112309259, - 2111073942 - ], - "samples_ts": [ - 242.28, - 242.389, - 242.531 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T22:08:44Z", - "avg_ns": 2811355500, - "stddev_ns": 357597755, - "avg_ts": 45.991795, - "stddev_ts": 5.45008, - "samples_ns": [ - 2608849642, - 2600968111, - 3224248747 - ], - "samples_ts": [ - 49.0638, - 49.2124, - 39.6992 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 186 - }, - { - "timestamp_utc": "2025-12-08T22:09:37.127322+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:08:53Z\",\n \"avg_ns\": 2294705926,\n \"stddev_ns\": 314253900,\n \"avg_ts\": 225.738026,\n \"stddev_ts\": 28.650796,\n \"samples_ns\": [ 2117081251, 2109487946, 2657548583 ],\n \"samples_ts\": [ 241.842, 242.713, 192.659 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:09:02Z\",\n \"avg_ns\": 11552143893,\n \"stddev_ns\": 512986570,\n \"avg_ts\": 44.378850,\n \"stddev_ts\": 1.962252,\n \"samples_ns\": [ 11060736776, 11511413352, 12084281552 ],\n \"samples_ts\": [ 46.2899, 44.4776, 42.3691 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T22:08:53Z", - "avg_ns": 2294705926, - "stddev_ns": 314253900, - "avg_ts": 225.738026, - "stddev_ts": 28.650796, - "samples_ns": [ - 2117081251, - 2109487946, - 2657548583 - ], - "samples_ts": [ - 241.842, - 242.713, - 192.659 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T22:09:02Z", - "avg_ns": 11552143893, - "stddev_ns": 512986570, - "avg_ts": 44.37885, - "stddev_ts": 1.962252, - "samples_ns": [ - 11060736776, - 11511413352, - 12084281552 - ], - "samples_ts": [ - 46.2899, - 44.4776, - 42.3691 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 187 - }, - { - "timestamp_utc": "2025-12-08T22:09:47.663632+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:09:37Z\",\n \"avg_ns\": 520523274,\n \"stddev_ns\": 270532,\n \"avg_ts\": 245.906435,\n \"stddev_ts\": 0.126929,\n \"samples_ns\": [ 520648661, 520706255, 520214908 ],\n \"samples_ts\": [ 245.847, 245.82, 246.052 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:09:39Z\",\n \"avg_ns\": 2584524374,\n \"stddev_ns\": 24484347,\n \"avg_ts\": 49.528512,\n \"stddev_ts\": 0.468491,\n \"samples_ns\": [ 2561520974, 2610260397, 2581791751 ],\n \"samples_ts\": [ 49.9703, 49.0373, 49.578 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T22:09:37Z", - "avg_ns": 520523274, - "stddev_ns": 270532, - "avg_ts": 245.906435, - "stddev_ts": 0.126929, - "samples_ns": [ - 520648661, - 520706255, - 520214908 - ], - "samples_ts": [ - 245.847, - 245.82, - 246.052 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T22:09:39Z", - "avg_ns": 2584524374, - "stddev_ns": 24484347, - "avg_ts": 49.528512, - "stddev_ts": 0.468491, - "samples_ns": [ - 2561520974, - 2610260397, - 2581791751 - ], - "samples_ts": [ - 49.9703, - 49.0373, - 49.578 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 188 - }, - { - "timestamp_utc": "2025-12-08T22:10:22.233131+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:09:48Z\",\n \"avg_ns\": 520506024,\n \"stddev_ns\": 476499,\n \"avg_ts\": 245.914677,\n \"stddev_ts\": 0.224595,\n \"samples_ns\": [ 520040080, 520990359, 520487635 ],\n \"samples_ts\": [ 246.135, 245.686, 245.923 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:09:50Z\",\n \"avg_ns\": 10599778857,\n \"stddev_ns\": 44390697,\n \"avg_ts\": 48.303458,\n \"stddev_ts\": 0.201803,\n \"samples_ns\": [ 10573120235, 10651022841, 10575193495 ],\n \"samples_ts\": [ 48.4247, 48.0705, 48.4152 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T22:09:48Z", - "avg_ns": 520506024, - "stddev_ns": 476499, - "avg_ts": 245.914677, - "stddev_ts": 0.224595, - "samples_ns": [ - 520040080, - 520990359, - 520487635 - ], - "samples_ts": [ - 246.135, - 245.686, - 245.923 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T22:09:50Z", - "avg_ns": 10599778857, - "stddev_ns": 44390697, - "avg_ts": 48.303458, - "stddev_ts": 0.201803, - "samples_ns": [ - 10573120235, - 10651022841, - 10575193495 - ], - "samples_ts": [ - 48.4247, - 48.0705, - 48.4152 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 189 - }, - { - "timestamp_utc": "2025-12-08T22:10:39.530476+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:10:22Z\",\n \"avg_ns\": 2206626171,\n \"stddev_ns\": 2116161,\n \"avg_ts\": 232.028569,\n \"stddev_ts\": 0.222522,\n \"samples_ns\": [ 2204200717, 2208087099, 2207590699 ],\n \"samples_ts\": [ 232.284, 231.875, 231.927 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:10:31Z\",\n \"avg_ns\": 2586505541,\n \"stddev_ns\": 17264622,\n \"avg_ts\": 49.489086,\n \"stddev_ts\": 0.329672,\n \"samples_ns\": [ 2605300456, 2582863775, 2571352393 ],\n \"samples_ts\": [ 49.1306, 49.5574, 49.7793 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T22:10:22Z", - "avg_ns": 2206626171, - "stddev_ns": 2116161, - "avg_ts": 232.028569, - "stddev_ts": 0.222522, - "samples_ns": [ - 2204200717, - 2208087099, - 2207590699 - ], - "samples_ts": [ - 232.284, - 231.875, - 231.927 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T22:10:31Z", - "avg_ns": 2586505541, - "stddev_ns": 17264622, - "avg_ts": 49.489086, - "stddev_ts": 0.329672, - "samples_ns": [ - 2605300456, - 2582863775, - 2571352393 - ], - "samples_ts": [ - 49.1306, - 49.5574, - 49.7793 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 190 - }, - { - "timestamp_utc": "2025-12-08T22:11:22.201262+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:10:40Z\",\n \"avg_ns\": 2250490048,\n \"stddev_ns\": 80265943,\n \"avg_ts\": 227.695204,\n \"stddev_ts\": 7.957137,\n \"samples_ns\": [ 2204528513, 2343172108, 2203769525 ],\n \"samples_ts\": [ 232.249, 218.507, 232.329 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:10:49Z\",\n \"avg_ns\": 11002191084,\n \"stddev_ns\": 321977687,\n \"avg_ts\": 46.563222,\n \"stddev_ts\": 1.385787,\n \"samples_ns\": [ 11170375062, 10630948924, 11205249268 ],\n \"samples_ts\": [ 45.8355, 48.1613, 45.6929 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T22:10:40Z", - "avg_ns": 2250490048, - "stddev_ns": 80265943, - "avg_ts": 227.695204, - "stddev_ts": 7.957137, - "samples_ns": [ - 2204528513, - 2343172108, - 2203769525 - ], - "samples_ts": [ - 232.249, - 218.507, - 232.329 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T22:10:49Z", - "avg_ns": 11002191084, - "stddev_ns": 321977687, - "avg_ts": 46.563222, - "stddev_ts": 1.385787, - "samples_ns": [ - 11170375062, - 10630948924, - 11205249268 - ], - "samples_ts": [ - 45.8355, - 48.1613, - 45.6929 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 191 - }, - { - "timestamp_utc": "2025-12-08T22:11:32.724536+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:11:22Z\",\n \"avg_ns\": 521811417,\n \"stddev_ns\": 1830795,\n \"avg_ts\": 245.301360,\n \"stddev_ts\": 0.860728,\n \"samples_ns\": [ 519953129, 523613124, 521867999 ],\n \"samples_ts\": [ 246.176, 244.455, 245.273 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:11:24Z\",\n \"avg_ns\": 2572979736,\n \"stddev_ns\": 2758317,\n \"avg_ts\": 49.747806,\n \"stddev_ts\": 0.053280,\n \"samples_ns\": [ 2571385105, 2576163689, 2571390416 ],\n \"samples_ts\": [ 49.7786, 49.6863, 49.7785 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T22:11:22Z", - "avg_ns": 521811417, - "stddev_ns": 1830795, - "avg_ts": 245.30136, - "stddev_ts": 0.860728, - "samples_ns": [ - 519953129, - 523613124, - 521867999 - ], - "samples_ts": [ - 246.176, - 244.455, - 245.273 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T22:11:24Z", - "avg_ns": 2572979736, - "stddev_ns": 2758317, - "avg_ts": 49.747806, - "stddev_ts": 0.05328, - "samples_ns": [ - 2571385105, - 2576163689, - 2571390416 - ], - "samples_ts": [ - 49.7786, - 49.6863, - 49.7785 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 192 - }, - { - "timestamp_utc": "2025-12-08T22:12:10.487648+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:11:33Z\",\n \"avg_ns\": 527644593,\n \"stddev_ns\": 7696940,\n \"avg_ts\": 242.621683,\n \"stddev_ts\": 3.511686,\n \"samples_ns\": [ 524181688, 522287385, 536464706 ],\n \"samples_ts\": [ 244.19, 245.076, 238.599 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:11:35Z\",\n \"avg_ns\": 11651127238,\n \"stddev_ns\": 319030946,\n \"avg_ts\": 43.966025,\n \"stddev_ts\": 1.193195,\n \"samples_ns\": [ 11575364599, 11376797492, 12001219624 ],\n \"samples_ts\": [ 44.2319, 45.0039, 42.6623 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T22:11:33Z", - "avg_ns": 527644593, - "stddev_ns": 7696940, - "avg_ts": 242.621683, - "stddev_ts": 3.511686, - "samples_ns": [ - 524181688, - 522287385, - 536464706 - ], - "samples_ts": [ - 244.19, - 245.076, - 238.599 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T22:11:35Z", - "avg_ns": 11651127238, - "stddev_ns": 319030946, - "avg_ts": 43.966025, - "stddev_ts": 1.193195, - "samples_ns": [ - 11575364599, - 11376797492, - 12001219624 - ], - "samples_ts": [ - 44.2319, - 45.0039, - 42.6623 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 193 - }, - { - "timestamp_utc": "2025-12-08T22:12:27.848761+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:12:11Z\",\n \"avg_ns\": 2269775903,\n \"stddev_ns\": 267065290,\n \"avg_ts\": 227.539423,\n \"stddev_ts\": 25.069716,\n \"samples_ns\": [ 2578154454, 2116520144, 2114653111 ],\n \"samples_ts\": [ 198.592, 241.907, 242.12 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:12:20Z\",\n \"avg_ns\": 2577707933,\n \"stddev_ns\": 5455495,\n \"avg_ts\": 49.656666,\n \"stddev_ts\": 0.105090,\n \"samples_ns\": [ 2577712004, 2583161156, 2572250640 ],\n \"samples_ts\": [ 49.6564, 49.5517, 49.7619 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T22:12:11Z", - "avg_ns": 2269775903, - "stddev_ns": 267065290, - "avg_ts": 227.539423, - "stddev_ts": 25.069716, - "samples_ns": [ - 2578154454, - 2116520144, - 2114653111 - ], - "samples_ts": [ - 198.592, - 241.907, - 242.12 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T22:12:20Z", - "avg_ns": 2577707933, - "stddev_ns": 5455495, - "avg_ts": 49.656666, - "stddev_ts": 0.10509, - "samples_ns": [ - 2577712004, - 2583161156, - 2572250640 - ], - "samples_ts": [ - 49.6564, - 49.5517, - 49.7619 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 194 - }, - { - "timestamp_utc": "2025-12-08T22:13:09.837040+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:12:28Z\",\n \"avg_ns\": 2118319257,\n \"stddev_ns\": 5949717,\n \"avg_ts\": 241.702325,\n \"stddev_ts\": 0.678372,\n \"samples_ns\": [ 2112921349, 2117338288, 2124698136 ],\n \"samples_ts\": [ 242.319, 241.813, 240.975 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:12:36Z\",\n \"avg_ns\": 10939775520,\n \"stddev_ns\": 326718523,\n \"avg_ts\": 46.830025,\n \"stddev_ts\": 1.423077,\n \"samples_ns\": [ 11135349729, 11121377087, 10562599745 ],\n \"samples_ts\": [ 45.9797, 46.0375, 48.4729 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T22:12:28Z", - "avg_ns": 2118319257, - "stddev_ns": 5949717, - "avg_ts": 241.702325, - "stddev_ts": 0.678372, - "samples_ns": [ - 2112921349, - 2117338288, - 2124698136 - ], - "samples_ts": [ - 242.319, - 241.813, - 240.975 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T22:12:36Z", - "avg_ns": 10939775520, - "stddev_ns": 326718523, - "avg_ts": 46.830025, - "stddev_ts": 1.423077, - "samples_ns": [ - 11135349729, - 11121377087, - 10562599745 - ], - "samples_ts": [ - 45.9797, - 46.0375, - 48.4729 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 195 - }, - { - "timestamp_utc": "2025-12-08T22:13:20.887482+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:13:10Z\",\n \"avg_ns\": 687689746,\n \"stddev_ns\": 203179161,\n \"avg_ts\": 196.441762,\n \"stddev_ts\": 52.814274,\n \"samples_ns\": [ 523500576, 624652635, 914916027 ],\n \"samples_ts\": [ 244.508, 204.914, 139.904 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:13:13Z\",\n \"avg_ns\": 2587776891,\n \"stddev_ns\": 1148173,\n \"avg_ts\": 49.463312,\n \"stddev_ts\": 0.021902,\n \"samples_ns\": [ 2587676326, 2586684571, 2588969778 ],\n \"samples_ts\": [ 49.4652, 49.4842, 49.4405 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T22:13:10Z", - "avg_ns": 687689746, - "stddev_ns": 203179161, - "avg_ts": 196.441762, - "stddev_ts": 52.814274, - "samples_ns": [ - 523500576, - 624652635, - 914916027 - ], - "samples_ts": [ - 244.508, - 204.914, - 139.904 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T22:13:13Z", - "avg_ns": 2587776891, - "stddev_ns": 1148173, - "avg_ts": 49.463312, - "stddev_ts": 0.021902, - "samples_ns": [ - 2587676326, - 2586684571, - 2588969778 - ], - "samples_ts": [ - 49.4652, - 49.4842, - 49.4405 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 196 - }, - { - "timestamp_utc": "2025-12-08T22:13:56.991958+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:13:21Z\",\n \"avg_ns\": 524719451,\n \"stddev_ns\": 593836,\n \"avg_ts\": 243.940088,\n \"stddev_ts\": 0.275896,\n \"samples_ns\": [ 524336947, 525403562, 524417844 ],\n \"samples_ts\": [ 244.118, 243.622, 244.08 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:13:23Z\",\n \"avg_ns\": 11099411288,\n \"stddev_ns\": 351009241,\n \"avg_ts\": 46.159918,\n \"stddev_ts\": 1.486778,\n \"samples_ns\": [ 11289969051, 11313927954, 10694336860 ],\n \"samples_ts\": [ 45.35, 45.254, 47.8758 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T22:13:21Z", - "avg_ns": 524719451, - "stddev_ns": 593836, - "avg_ts": 243.940088, - "stddev_ts": 0.275896, - "samples_ns": [ - 524336947, - 525403562, - 524417844 - ], - "samples_ts": [ - 244.118, - 243.622, - 244.08 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T22:13:23Z", - "avg_ns": 11099411288, - "stddev_ns": 351009241, - "avg_ts": 46.159918, - "stddev_ts": 1.486778, - "samples_ns": [ - 11289969051, - 11313927954, - 10694336860 - ], - "samples_ts": [ - 45.35, - 45.254, - 47.8758 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 197 - }, - { - "timestamp_utc": "2025-12-08T22:14:14.442135+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:13:57Z\",\n \"avg_ns\": 2122864005,\n \"stddev_ns\": 2476525,\n \"avg_ts\": 241.183827,\n \"stddev_ts\": 0.281321,\n \"samples_ns\": [ 2125326805, 2122890389, 2120374822 ],\n \"samples_ts\": [ 240.904, 241.181, 241.467 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:14:06Z\",\n \"avg_ns\": 2597972795,\n \"stddev_ns\": 4052348,\n \"avg_ts\": 49.269264,\n \"stddev_ts\": 0.076802,\n \"samples_ns\": [ 2596772928, 2594655872, 2602489585 ],\n \"samples_ts\": [ 49.2919, 49.3322, 49.1837 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T22:13:57Z", - "avg_ns": 2122864005, - "stddev_ns": 2476525, - "avg_ts": 241.183827, - "stddev_ts": 0.281321, - "samples_ns": [ - 2125326805, - 2122890389, - 2120374822 - ], - "samples_ts": [ - 240.904, - 241.181, - 241.467 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T22:14:06Z", - "avg_ns": 2597972795, - "stddev_ns": 4052348, - "avg_ts": 49.269264, - "stddev_ts": 0.076802, - "samples_ns": [ - 2596772928, - 2594655872, - 2602489585 - ], - "samples_ts": [ - 49.2919, - 49.3322, - 49.1837 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 198 - }, - { - "timestamp_utc": "2025-12-08T22:14:56.203588+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:14:15Z\",\n \"avg_ns\": 2118129834,\n \"stddev_ns\": 6941662,\n \"avg_ts\": 241.724400,\n \"stddev_ts\": 0.791723,\n \"samples_ns\": [ 2111657979, 2117270277, 2125461246 ],\n \"samples_ts\": [ 242.464, 241.821, 240.889 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:14:23Z\",\n \"avg_ns\": 10862800449,\n \"stddev_ns\": 387977400,\n \"avg_ts\": 47.172640,\n \"stddev_ts\": 1.650785,\n \"samples_ns\": [ 10638898851, 11310798116, 10638704382 ],\n \"samples_ts\": [ 48.1253, 45.2665, 48.1262 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T22:14:15Z", - "avg_ns": 2118129834, - "stddev_ns": 6941662, - "avg_ts": 241.7244, - "stddev_ts": 0.791723, - "samples_ns": [ - 2111657979, - 2117270277, - 2125461246 - ], - "samples_ts": [ - 242.464, - 241.821, - 240.889 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T22:14:23Z", - "avg_ns": 10862800449, - "stddev_ns": 387977400, - "avg_ts": 47.17264, - "stddev_ts": 1.650785, - "samples_ns": [ - 10638898851, - 11310798116, - 10638704382 - ], - "samples_ts": [ - 48.1253, - 45.2665, - 48.1262 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 199 - }, - { - "timestamp_utc": "2025-12-08T22:15:06.798537+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:14:56Z\",\n \"avg_ns\": 519585666,\n \"stddev_ns\": 2247834,\n \"avg_ts\": 246.353217,\n \"stddev_ts\": 1.068105,\n \"samples_ns\": [ 517015396, 520559353, 521182251 ],\n \"samples_ts\": [ 247.575, 245.889, 245.595 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:14:58Z\",\n \"avg_ns\": 2601167089,\n \"stddev_ns\": 6329539,\n \"avg_ts\": 49.208874,\n \"stddev_ts\": 0.119641,\n \"samples_ns\": [ 2595678302, 2599732635, 2608090332 ],\n \"samples_ts\": [ 49.3127, 49.2358, 49.0781 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T22:14:56Z", - "avg_ns": 519585666, - "stddev_ns": 2247834, - "avg_ts": 246.353217, - "stddev_ts": 1.068105, - "samples_ns": [ - 517015396, - 520559353, - 521182251 - ], - "samples_ts": [ - 247.575, - 245.889, - 245.595 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T22:14:58Z", - "avg_ns": 2601167089, - "stddev_ns": 6329539, - "avg_ts": 49.208874, - "stddev_ts": 0.119641, - "samples_ns": [ - 2595678302, - 2599732635, - 2608090332 - ], - "samples_ts": [ - 49.3127, - 49.2358, - 49.0781 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 200 - }, - { - "timestamp_utc": "2025-12-08T22:15:41.428631+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:15:07Z\",\n \"avg_ns\": 521948728,\n \"stddev_ns\": 1879738,\n \"avg_ts\": 245.236931,\n \"stddev_ts\": 0.881332,\n \"samples_ns\": [ 524114303, 520990887, 520740995 ],\n \"samples_ts\": [ 244.222, 245.686, 245.804 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:15:09Z\",\n \"avg_ns\": 10611593481,\n \"stddev_ns\": 7334058,\n \"avg_ts\": 48.249131,\n \"stddev_ts\": 0.033338,\n \"samples_ns\": [ 10610332225, 10619475642, 10604972577 ],\n \"samples_ts\": [ 48.2549, 48.2133, 48.2792 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T22:15:07Z", - "avg_ns": 521948728, - "stddev_ns": 1879738, - "avg_ts": 245.236931, - "stddev_ts": 0.881332, - "samples_ns": [ - 524114303, - 520990887, - 520740995 - ], - "samples_ts": [ - 244.222, - 245.686, - 245.804 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T22:15:09Z", - "avg_ns": 10611593481, - "stddev_ns": 7334058, - "avg_ts": 48.249131, - "stddev_ts": 0.033338, - "samples_ns": [ - 10610332225, - 10619475642, - 10604972577 - ], - "samples_ts": [ - 48.2549, - 48.2133, - 48.2792 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 201 - }, - { - "timestamp_utc": "2025-12-08T22:15:58.779461+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:15:42Z\",\n \"avg_ns\": 2211679055,\n \"stddev_ns\": 6361717,\n \"avg_ts\": 231.499601,\n \"stddev_ts\": 0.664818,\n \"samples_ns\": [ 2207383462, 2218987292, 2208666412 ],\n \"samples_ts\": [ 231.949, 230.736, 231.814 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:15:50Z\",\n \"avg_ns\": 2594414779,\n \"stddev_ns\": 12467587,\n \"avg_ts\": 49.337513,\n \"stddev_ts\": 0.237515,\n \"samples_ns\": [ 2604910056, 2580633231, 2597701050 ],\n \"samples_ts\": [ 49.138, 49.6002, 49.2743 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T22:15:42Z", - "avg_ns": 2211679055, - "stddev_ns": 6361717, - "avg_ts": 231.499601, - "stddev_ts": 0.664818, - "samples_ns": [ - 2207383462, - 2218987292, - 2208666412 - ], - "samples_ts": [ - 231.949, - 230.736, - 231.814 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T22:15:50Z", - "avg_ns": 2594414779, - "stddev_ns": 12467587, - "avg_ts": 49.337513, - "stddev_ts": 0.237515, - "samples_ns": [ - 2604910056, - 2580633231, - 2597701050 - ], - "samples_ts": [ - 49.138, - 49.6002, - 49.2743 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 202 - }, - { - "timestamp_utc": "2025-12-08T22:16:40.050929+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:15:59Z\",\n \"avg_ns\": 2211679210,\n \"stddev_ns\": 2101388,\n \"avg_ts\": 231.498449,\n \"stddev_ts\": 0.219961,\n \"samples_ns\": [ 2213070730, 2209263228, 2212703674 ],\n \"samples_ts\": [ 231.353, 231.751, 231.391 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:16:08Z\",\n \"avg_ns\": 10573793050,\n \"stddev_ns\": 22481223,\n \"avg_ts\": 48.421748,\n \"stddev_ts\": 0.102837,\n \"samples_ns\": [ 10599409998, 10557349573, 10564619581 ],\n \"samples_ts\": [ 48.3046, 48.497, 48.4636 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T22:15:59Z", - "avg_ns": 2211679210, - "stddev_ns": 2101388, - "avg_ts": 231.498449, - "stddev_ts": 0.219961, - "samples_ns": [ - 2213070730, - 2209263228, - 2212703674 - ], - "samples_ts": [ - 231.353, - 231.751, - 231.391 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T22:16:08Z", - "avg_ns": 10573793050, - "stddev_ns": 22481223, - "avg_ts": 48.421748, - "stddev_ts": 0.102837, - "samples_ns": [ - 10599409998, - 10557349573, - 10564619581 - ], - "samples_ts": [ - 48.3046, - 48.497, - 48.4636 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 203 - }, - { - "timestamp_utc": "2025-12-08T22:16:50.632493+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:16:40Z\",\n \"avg_ns\": 519590120,\n \"stddev_ns\": 487956,\n \"avg_ts\": 246.348170,\n \"stddev_ts\": 0.231346,\n \"samples_ns\": [ 519105587, 519583343, 520081430 ],\n \"samples_ts\": [ 246.578, 246.351, 246.115 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:16:42Z\",\n \"avg_ns\": 2605729000,\n \"stddev_ns\": 7665492,\n \"avg_ts\": 49.122813,\n \"stddev_ts\": 0.144737,\n \"samples_ns\": [ 2609361272, 2610902823, 2596922907 ],\n \"samples_ts\": [ 49.0542, 49.0252, 49.2891 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T22:16:40Z", - "avg_ns": 519590120, - "stddev_ns": 487956, - "avg_ts": 246.34817, - "stddev_ts": 0.231346, - "samples_ns": [ - 519105587, - 519583343, - 520081430 - ], - "samples_ts": [ - 246.578, - 246.351, - 246.115 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T22:16:42Z", - "avg_ns": 2605729000, - "stddev_ns": 7665492, - "avg_ts": 49.122813, - "stddev_ts": 0.144737, - "samples_ns": [ - 2609361272, - 2610902823, - 2596922907 - ], - "samples_ts": [ - 49.0542, - 49.0252, - 49.2891 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 204 - }, - { - "timestamp_utc": "2025-12-08T22:17:26.193025+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:16:51Z\",\n \"avg_ns\": 521040784,\n \"stddev_ns\": 1976165,\n \"avg_ts\": 245.664503,\n \"stddev_ts\": 0.930378,\n \"samples_ns\": [ 523216741, 519358587, 520547026 ],\n \"samples_ts\": [ 244.64, 246.458, 245.895 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:16:53Z\",\n \"avg_ns\": 10922595012,\n \"stddev_ns\": 377927771,\n \"avg_ts\": 46.912018,\n \"stddev_ts\": 1.591469,\n \"samples_ns\": [ 10695809711, 10713101118, 11358874207 ],\n \"samples_ts\": [ 47.8692, 47.792, 45.0749 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T22:16:51Z", - "avg_ns": 521040784, - "stddev_ns": 1976165, - "avg_ts": 245.664503, - "stddev_ts": 0.930378, - "samples_ns": [ - 523216741, - 519358587, - 520547026 - ], - "samples_ts": [ - 244.64, - 246.458, - 245.895 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T22:16:53Z", - "avg_ns": 10922595012, - "stddev_ns": 377927771, - "avg_ts": 46.912018, - "stddev_ts": 1.591469, - "samples_ns": [ - 10695809711, - 10713101118, - 11358874207 - ], - "samples_ts": [ - 47.8692, - 47.792, - 45.0749 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 205 - }, - { - "timestamp_utc": "2025-12-08T22:17:43.219282+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:17:26Z\",\n \"avg_ns\": 2122300457,\n \"stddev_ns\": 6635096,\n \"avg_ts\": 241.249222,\n \"stddev_ts\": 0.753607,\n \"samples_ns\": [ 2129462608, 2116363071, 2121075692 ],\n \"samples_ts\": [ 240.436, 241.924, 241.387 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:17:35Z\",\n \"avg_ns\": 2606926486,\n \"stddev_ns\": 25327438,\n \"avg_ts\": 49.103066,\n \"stddev_ts\": 0.478707,\n \"samples_ns\": [ 2613268309, 2628480257, 2579030894 ],\n \"samples_ts\": [ 48.9808, 48.6973, 49.631 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T22:17:26Z", - "avg_ns": 2122300457, - "stddev_ns": 6635096, - "avg_ts": 241.249222, - "stddev_ts": 0.753607, - "samples_ns": [ - 2129462608, - 2116363071, - 2121075692 - ], - "samples_ts": [ - 240.436, - 241.924, - 241.387 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T22:17:35Z", - "avg_ns": 2606926486, - "stddev_ns": 25327438, - "avg_ts": 49.103066, - "stddev_ts": 0.478707, - "samples_ns": [ - 2613268309, - 2628480257, - 2579030894 - ], - "samples_ts": [ - 48.9808, - 48.6973, - 49.631 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 206 - }, - { - "timestamp_utc": "2025-12-08T22:18:26.332478+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:17:43Z\",\n \"avg_ns\": 2118175516,\n \"stddev_ns\": 3691010,\n \"avg_ts\": 241.717947,\n \"stddev_ts\": 0.421367,\n \"samples_ns\": [ 2114150180, 2118977061, 2121399309 ],\n \"samples_ts\": [ 242.178, 241.626, 241.35 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:17:52Z\",\n \"avg_ns\": 11313086075,\n \"stddev_ns\": 244178282,\n \"avg_ts\": 45.271211,\n \"stddev_ts\": 0.965100,\n \"samples_ns\": [ 11595024274, 11174599221, 11169634731 ],\n \"samples_ts\": [ 44.1569, 45.8182, 45.8386 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T22:17:43Z", - "avg_ns": 2118175516, - "stddev_ns": 3691010, - "avg_ts": 241.717947, - "stddev_ts": 0.421367, - "samples_ns": [ - 2114150180, - 2118977061, - 2121399309 - ], - "samples_ts": [ - 242.178, - 241.626, - 241.35 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T22:17:52Z", - "avg_ns": 11313086075, - "stddev_ns": 244178282, - "avg_ts": 45.271211, - "stddev_ts": 0.9651, - "samples_ns": [ - 11595024274, - 11174599221, - 11169634731 - ], - "samples_ts": [ - 44.1569, - 45.8182, - 45.8386 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 207 - }, - { - "timestamp_utc": "2025-12-08T22:18:37.582089+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:18:26Z\",\n \"avg_ns\": 520432646,\n \"stddev_ns\": 1028523,\n \"avg_ts\": 245.949854,\n \"stddev_ts\": 0.486374,\n \"samples_ns\": [ 520966263, 519247547, 521084130 ],\n \"samples_ts\": [ 245.697, 246.511, 245.642 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:18:29Z\",\n \"avg_ns\": 2819245159,\n \"stddev_ns\": 348063139,\n \"avg_ts\": 45.837106,\n \"stddev_ts\": 5.283778,\n \"samples_ns\": [ 2609479430, 2627232919, 3221023128 ],\n \"samples_ts\": [ 49.0519, 48.7205, 39.7389 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T22:18:26Z", - "avg_ns": 520432646, - "stddev_ns": 1028523, - "avg_ts": 245.949854, - "stddev_ts": 0.486374, - "samples_ns": [ - 520966263, - 519247547, - 521084130 - ], - "samples_ts": [ - 245.697, - 246.511, - 245.642 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T22:18:29Z", - "avg_ns": 2819245159, - "stddev_ns": 348063139, - "avg_ts": 45.837106, - "stddev_ts": 5.283778, - "samples_ns": [ - 2609479430, - 2627232919, - 3221023128 - ], - "samples_ts": [ - 49.0519, - 48.7205, - 39.7389 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 208 - }, - { - "timestamp_utc": "2025-12-08T22:19:12.899427+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:18:38Z\",\n \"avg_ns\": 521173366,\n \"stddev_ns\": 1097741,\n \"avg_ts\": 245.600384,\n \"stddev_ts\": 0.517491,\n \"samples_ns\": [ 522203057, 520018334, 521298707 ],\n \"samples_ts\": [ 245.115, 246.145, 245.541 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:18:40Z\",\n \"avg_ns\": 10838908470,\n \"stddev_ns\": 343902387,\n \"avg_ts\": 47.268440,\n \"stddev_ts\": 1.475605,\n \"samples_ns\": [ 10694411638, 11231483814, 10590829959 ],\n \"samples_ts\": [ 47.8755, 45.5861, 48.3437 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T22:18:38Z", - "avg_ns": 521173366, - "stddev_ns": 1097741, - "avg_ts": 245.600384, - "stddev_ts": 0.517491, - "samples_ns": [ - 522203057, - 520018334, - 521298707 - ], - "samples_ts": [ - 245.115, - 246.145, - 245.541 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T22:18:40Z", - "avg_ns": 10838908470, - "stddev_ns": 343902387, - "avg_ts": 47.26844, - "stddev_ts": 1.475605, - "samples_ns": [ - 10694411638, - 11231483814, - 10590829959 - ], - "samples_ts": [ - 47.8755, - 45.5861, - 48.3437 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 209 - }, - { - "timestamp_utc": "2025-12-08T22:19:30.335289+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:19:13Z\",\n \"avg_ns\": 2276860130,\n \"stddev_ns\": 263800074,\n \"avg_ts\": 226.773357,\n \"stddev_ts\": 24.627891,\n \"samples_ns\": [ 2127399367, 2121728413, 2581452612 ],\n \"samples_ts\": [ 240.669, 241.313, 198.338 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:19:22Z\",\n \"avg_ns\": 2588765700,\n \"stddev_ns\": 4099829,\n \"avg_ts\": 49.444496,\n \"stddev_ts\": 0.078360,\n \"samples_ns\": [ 2590328620, 2591853863, 2584114618 ],\n \"samples_ts\": [ 49.4146, 49.3855, 49.5334 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T22:19:13Z", - "avg_ns": 2276860130, - "stddev_ns": 263800074, - "avg_ts": 226.773357, - "stddev_ts": 24.627891, - "samples_ns": [ - 2127399367, - 2121728413, - 2581452612 - ], - "samples_ts": [ - 240.669, - 241.313, - 198.338 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T22:19:22Z", - "avg_ns": 2588765700, - "stddev_ns": 4099829, - "avg_ts": 49.444496, - "stddev_ts": 0.07836, - "samples_ns": [ - 2590328620, - 2591853863, - 2584114618 - ], - "samples_ts": [ - 49.4146, - 49.3855, - 49.5334 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 210 - }, - { - "timestamp_utc": "2025-12-08T22:20:12.911529+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:19:30Z\",\n \"avg_ns\": 2254415146,\n \"stddev_ns\": 243509175,\n \"avg_ts\": 228.785638,\n \"stddev_ts\": 23.271666,\n \"samples_ns\": [ 2535402519, 2104905318, 2122937601 ],\n \"samples_ts\": [ 201.94, 243.241, 241.175 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:19:39Z\",\n \"avg_ns\": 10983064562,\n \"stddev_ns\": 370336481,\n \"avg_ts\": 46.653289,\n \"stddev_ts\": 1.604308,\n \"samples_ns\": [ 11201459577, 10555469801, 11192264309 ],\n \"samples_ts\": [ 45.7083, 48.5057, 45.7459 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T22:19:30Z", - "avg_ns": 2254415146, - "stddev_ns": 243509175, - "avg_ts": 228.785638, - "stddev_ts": 23.271666, - "samples_ns": [ - 2535402519, - 2104905318, - 2122937601 - ], - "samples_ts": [ - 201.94, - 243.241, - 241.175 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T22:19:39Z", - "avg_ns": 10983064562, - "stddev_ns": 370336481, - "avg_ts": 46.653289, - "stddev_ts": 1.604308, - "samples_ns": [ - 11201459577, - 10555469801, - 11192264309 - ], - "samples_ts": [ - 45.7083, - 48.5057, - 45.7459 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 211 - }, - { - "timestamp_utc": "2025-12-08T22:20:23.533660+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:20:13Z\",\n \"avg_ns\": 523238470,\n \"stddev_ns\": 1599980,\n \"avg_ts\": 244.631852,\n \"stddev_ts\": 0.746707,\n \"samples_ns\": [ 522564722, 522085903, 525064787 ],\n \"samples_ts\": [ 244.946, 245.17, 243.779 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:20:15Z\",\n \"avg_ns\": 2607326923,\n \"stddev_ns\": 7854488,\n \"avg_ts\": 49.092722,\n \"stddev_ts\": 0.148080,\n \"samples_ns\": [ 2598535279, 2609793178, 2613652312 ],\n \"samples_ts\": [ 49.2585, 49.046, 48.9736 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T22:20:13Z", - "avg_ns": 523238470, - "stddev_ns": 1599980, - "avg_ts": 244.631852, - "stddev_ts": 0.746707, - "samples_ns": [ - 522564722, - 522085903, - 525064787 - ], - "samples_ts": [ - 244.946, - 245.17, - 243.779 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T22:20:15Z", - "avg_ns": 2607326923, - "stddev_ns": 7854488, - "avg_ts": 49.092722, - "stddev_ts": 0.14808, - "samples_ns": [ - 2598535279, - 2609793178, - 2613652312 - ], - "samples_ts": [ - 49.2585, - 49.046, - 48.9736 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 212 - }, - { - "timestamp_utc": "2025-12-08T22:20:58.012928+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:20:24Z\",\n \"avg_ns\": 521918368,\n \"stddev_ns\": 945938,\n \"avg_ts\": 245.249617,\n \"stddev_ts\": 0.444491,\n \"samples_ns\": [ 520886156, 522126809, 522742141 ],\n \"samples_ts\": [ 245.735, 245.151, 244.863 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:20:26Z\",\n \"avg_ns\": 10558176416,\n \"stddev_ns\": 38854732,\n \"avg_ts\": 48.493660,\n \"stddev_ts\": 0.178415,\n \"samples_ns\": [ 10556352584, 10597910947, 10520265717 ],\n \"samples_ts\": [ 48.5016, 48.3114, 48.668 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T22:20:24Z", - "avg_ns": 521918368, - "stddev_ns": 945938, - "avg_ts": 245.249617, - "stddev_ts": 0.444491, - "samples_ns": [ - 520886156, - 522126809, - 522742141 - ], - "samples_ts": [ - 245.735, - 245.151, - 244.863 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T22:20:26Z", - "avg_ns": 10558176416, - "stddev_ns": 38854732, - "avg_ts": 48.49366, - "stddev_ts": 0.178415, - "samples_ns": [ - 10556352584, - 10597910947, - 10520265717 - ], - "samples_ts": [ - 48.5016, - 48.3114, - 48.668 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 213 - }, - { - "timestamp_utc": "2025-12-08T22:21:15.297095+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:20:58Z\",\n \"avg_ns\": 2213759326,\n \"stddev_ns\": 7364574,\n \"avg_ts\": 231.282496,\n \"stddev_ts\": 0.770434,\n \"samples_ns\": [ 2219795553, 2215928478, 2205553948 ],\n \"samples_ts\": [ 230.652, 231.054, 232.141 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:21:07Z\",\n \"avg_ns\": 2572220260,\n \"stddev_ns\": 15998016,\n \"avg_ts\": 49.763736,\n \"stddev_ts\": 0.308437,\n \"samples_ns\": [ 2590621809, 2564423486, 2561615486 ],\n \"samples_ts\": [ 49.409, 49.9138, 49.9685 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T22:20:58Z", - "avg_ns": 2213759326, - "stddev_ns": 7364574, - "avg_ts": 231.282496, - "stddev_ts": 0.770434, - "samples_ns": [ - 2219795553, - 2215928478, - 2205553948 - ], - "samples_ts": [ - 230.652, - 231.054, - 232.141 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T22:21:07Z", - "avg_ns": 2572220260, - "stddev_ns": 15998016, - "avg_ts": 49.763736, - "stddev_ts": 0.308437, - "samples_ns": [ - 2590621809, - 2564423486, - 2561615486 - ], - "samples_ts": [ - 49.409, - 49.9138, - 49.9685 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 214 - }, - { - "timestamp_utc": "2025-12-08T22:21:56.599048+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:21:15Z\",\n \"avg_ns\": 2208598319,\n \"stddev_ns\": 2841298,\n \"avg_ts\": 231.821495,\n \"stddev_ts\": 0.298194,\n \"samples_ns\": [ 2211431587, 2208613543, 2205749828 ],\n \"samples_ts\": [ 231.524, 231.82, 232.121 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:21:24Z\",\n \"avg_ns\": 10581687672,\n \"stddev_ns\": 35920536,\n \"avg_ts\": 48.385849,\n \"stddev_ts\": 0.164573,\n \"samples_ns\": [ 10540213585, 10601966460, 10602882971 ],\n \"samples_ts\": [ 48.5759, 48.2929, 48.2888 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T22:21:15Z", - "avg_ns": 2208598319, - "stddev_ns": 2841298, - "avg_ts": 231.821495, - "stddev_ts": 0.298194, - "samples_ns": [ - 2211431587, - 2208613543, - 2205749828 - ], - "samples_ts": [ - 231.524, - 231.82, - 232.121 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_type": "gemma3 270M Q2_K - Medium", - "model_size": 230552064, - "model_n_params": 268098176, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T22:21:24Z", - "avg_ns": 10581687672, - "stddev_ns": 35920536, - "avg_ts": 48.385849, - "stddev_ts": 0.164573, - "samples_ns": [ - 10540213585, - 10601966460, - 10602882971 - ], - "samples_ts": [ - 48.5759, - 48.2929, - 48.2888 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-270M-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 215 - }, - { - "timestamp_utc": "2025-12-08T22:23:21.495932+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:22:02Z\",\n \"avg_ns\": 6893012133,\n \"stddev_ns\": 323399123,\n \"avg_ts\": 18.596101,\n \"stddev_ts\": 0.849490,\n \"samples_ns\": [ 6711480683, 7266393727, 6701161991 ],\n \"samples_ts\": [ 19.0718, 17.6153, 19.1012 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:22:29Z\",\n \"avg_ns\": 17101803483,\n \"stddev_ns\": 307608301,\n \"avg_ts\": 7.486189,\n \"stddev_ts\": 0.133317,\n \"samples_ns\": [ 17455657011, 16898163547, 16951589893 ],\n \"samples_ts\": [ 7.33287, 7.57479, 7.55091 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T22:22:02Z", - "avg_ns": 6893012133, - "stddev_ns": 323399123, - "avg_ts": 18.596101, - "stddev_ts": 0.84949, - "samples_ns": [ - 6711480683, - 7266393727, - 6701161991 - ], - "samples_ts": [ - 19.0718, - 17.6153, - 19.1012 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T22:22:29Z", - "avg_ns": 17101803483, - "stddev_ns": 307608301, - "avg_ts": 7.486189, - "stddev_ts": 0.133317, - "samples_ns": [ - 17455657011, - 16898163547, - 16951589893 - ], - "samples_ts": [ - 7.33287, - 7.57479, - 7.55091 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 216 - }, - { - "timestamp_utc": "2025-12-08T22:27:17.278660+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:23:22Z\",\n \"avg_ns\": 6916308559,\n \"stddev_ns\": 306180164,\n \"avg_ts\": 18.530590,\n \"stddev_ts\": 0.799919,\n \"samples_ns\": [ 6744689190, 7269805350, 6734431137 ],\n \"samples_ts\": [ 18.9779, 17.6071, 19.0068 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:23:49Z\",\n \"avg_ns\": 69102441542,\n \"stddev_ns\": 993948940,\n \"avg_ts\": 7.410307,\n \"stddev_ts\": 0.106116,\n \"samples_ns\": [ 68883735669, 70187530183, 68236058776 ],\n \"samples_ts\": [ 7.43281, 7.29474, 7.50336 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T22:23:22Z", - "avg_ns": 6916308559, - "stddev_ns": 306180164, - "avg_ts": 18.53059, - "stddev_ts": 0.799919, - "samples_ns": [ - 6744689190, - 7269805350, - 6734431137 - ], - "samples_ts": [ - 18.9779, - 17.6071, - 19.0068 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T22:23:49Z", - "avg_ns": 69102441542, - "stddev_ns": 993948940, - "avg_ts": 7.410307, - "stddev_ts": 0.106116, - "samples_ns": [ - 68883735669, - 70187530183, - 68236058776 - ], - "samples_ts": [ - 7.43281, - 7.29474, - 7.50336 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 217 - }, - { - "timestamp_utc": "2025-12-08T22:30:01.031337+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:27:18Z\",\n \"avg_ns\": 27785217750,\n \"stddev_ns\": 313141080,\n \"avg_ts\": 18.428635,\n \"stddev_ts\": 0.209052,\n \"samples_ns\": [ 27963896631, 27423641880, 27968114741 ],\n \"samples_ts\": [ 18.3093, 18.67, 18.3066 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:29:09Z\",\n \"avg_ns\": 17077206479,\n \"stddev_ns\": 384374519,\n \"avg_ts\": 7.497874,\n \"stddev_ts\": 0.166770,\n \"samples_ns\": [ 16907882358, 16806564757, 17517172322 ],\n \"samples_ts\": [ 7.57043, 7.61607, 7.30712 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T22:27:18Z", - "avg_ns": 27785217750, - "stddev_ns": 313141080, - "avg_ts": 18.428635, - "stddev_ts": 0.209052, - "samples_ns": [ - 27963896631, - 27423641880, - 27968114741 - ], - "samples_ts": [ - 18.3093, - 18.67, - 18.3066 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T22:29:09Z", - "avg_ns": 17077206479, - "stddev_ns": 384374519, - "avg_ts": 7.497874, - "stddev_ts": 0.16677, - "samples_ns": [ - 16907882358, - 16806564757, - 17517172322 - ], - "samples_ts": [ - 7.57043, - 7.61607, - 7.30712 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 218 - }, - { - "timestamp_utc": "2025-12-08T22:35:19.336992+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:30:02Z\",\n \"avg_ns\": 27170047853,\n \"stddev_ns\": 166832726,\n \"avg_ts\": 18.844756,\n \"stddev_ts\": 0.116069,\n \"samples_ns\": [ 27235668478, 27294092916, 26980382165 ],\n \"samples_ts\": [ 18.7989, 18.7586, 18.9768 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:31:50Z\",\n \"avg_ns\": 69491803158,\n \"stddev_ns\": 694416459,\n \"avg_ts\": 7.368263,\n \"stddev_ts\": 0.073233,\n \"samples_ns\": [ 70288088054, 69175280896, 69012040526 ],\n \"samples_ts\": [ 7.28431, 7.40149, 7.419 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T22:30:02Z", - "avg_ns": 27170047853, - "stddev_ns": 166832726, - "avg_ts": 18.844756, - "stddev_ts": 0.116069, - "samples_ns": [ - 27235668478, - 27294092916, - 26980382165 - ], - "samples_ts": [ - 18.7989, - 18.7586, - 18.9768 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T22:31:50Z", - "avg_ns": 69491803158, - "stddev_ns": 694416459, - "avg_ts": 7.368263, - "stddev_ts": 0.073233, - "samples_ns": [ - 70288088054, - 69175280896, - 69012040526 - ], - "samples_ts": [ - 7.28431, - 7.40149, - 7.419 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 219 - }, - { - "timestamp_utc": "2025-12-08T22:36:39.219347+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:35:20Z\",\n \"avg_ns\": 6714586702,\n \"stddev_ns\": 24869360,\n \"avg_ts\": 19.063150,\n \"stddev_ts\": 0.070582,\n \"samples_ns\": [ 6690519798, 6713052796, 6740187512 ],\n \"samples_ts\": [ 19.1315, 19.0673, 18.9906 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:35:47Z\",\n \"avg_ns\": 17172625603,\n \"stddev_ns\": 3133468105,\n \"avg_ts\": 7.463513,\n \"stddev_ts\": 0.326842,\n \"samples_ns\": [ 18063483191, 16721730433, 16732663186 ],\n \"samples_ts\": [ 7.08612, 7.65471, 7.64971 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T22:35:20Z", - "avg_ns": 6714586702, - "stddev_ns": 24869360, - "avg_ts": 19.06315, - "stddev_ts": 0.070582, - "samples_ns": [ - 6690519798, - 6713052796, - 6740187512 - ], - "samples_ts": [ - 19.1315, - 19.0673, - 18.9906 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T22:35:47Z", - "avg_ns": 17172625603, - "stddev_ns": 3133468105, - "avg_ts": 7.463513, - "stddev_ts": 0.326842, - "samples_ns": [ - 18063483191, - 16721730433, - 16732663186 - ], - "samples_ts": [ - 7.08612, - 7.65471, - 7.64971 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 220 - }, - { - "timestamp_utc": "2025-12-08T22:40:33.659742+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:36:39Z\",\n \"avg_ns\": 6741322625,\n \"stddev_ns\": 7244026,\n \"avg_ts\": 18.987387,\n \"stddev_ts\": 0.020391,\n \"samples_ns\": [ 6736521702, 6737791032, 6749655141 ],\n \"samples_ts\": [ 19.0009, 18.9973, 18.9639 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:37:06Z\",\n \"avg_ns\": 68840403239,\n \"stddev_ns\": 330388214,\n \"avg_ts\": 7.437607,\n \"stddev_ts\": 0.035793,\n \"samples_ns\": [ 69007566663, 69053804127, 68459838927 ],\n \"samples_ts\": [ 7.41948, 7.41451, 7.47884 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T22:36:39Z", - "avg_ns": 6741322625, - "stddev_ns": 7244026, - "avg_ts": 18.987387, - "stddev_ts": 0.020391, - "samples_ns": [ - 6736521702, - 6737791032, - 6749655141 - ], - "samples_ts": [ - 19.0009, - 18.9973, - 18.9639 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T22:37:06Z", - "avg_ns": 68840403239, - "stddev_ns": 330388214, - "avg_ts": 7.437607, - "stddev_ts": 0.035793, - "samples_ns": [ - 69007566663, - 69053804127, - 68459838927 - ], - "samples_ts": [ - 7.41948, - 7.41451, - 7.47884 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 221 - }, - { - "timestamp_utc": "2025-12-08T22:43:19.926901+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:40:34Z\",\n \"avg_ns\": 27463709398,\n \"stddev_ns\": 320556632,\n \"avg_ts\": 18.644466,\n \"stddev_ts\": 0.216164,\n \"samples_ns\": [ 27272874827, 27284457566, 27833795803 ],\n \"samples_ts\": [ 18.7732, 18.7653, 18.3949 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:42:25Z\",\n \"avg_ns\": 18146722237,\n \"stddev_ns\": 343355140,\n \"avg_ts\": 7.055317,\n \"stddev_ts\": 0.134890,\n \"samples_ns\": [ 18381543012, 18305965279, 17752658422 ],\n \"samples_ts\": [ 6.96351, 6.99226, 7.21019 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T22:40:34Z", - "avg_ns": 27463709398, - "stddev_ns": 320556632, - "avg_ts": 18.644466, - "stddev_ts": 0.216164, - "samples_ns": [ - 27272874827, - 27284457566, - 27833795803 - ], - "samples_ts": [ - 18.7732, - 18.7653, - 18.3949 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T22:42:25Z", - "avg_ns": 18146722237, - "stddev_ns": 343355140, - "avg_ts": 7.055317, - "stddev_ts": 0.13489, - "samples_ns": [ - 18381543012, - 18305965279, - 17752658422 - ], - "samples_ts": [ - 6.96351, - 6.99226, - 7.21019 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 222 - }, - { - "timestamp_utc": "2025-12-08T22:48:36.854705+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:43:20Z\",\n \"avg_ns\": 27710104300,\n \"stddev_ns\": 546211649,\n \"avg_ts\": 18.481817,\n \"stddev_ts\": 0.365478,\n \"samples_ns\": [ 28225878851, 27766588820, 27137845229 ],\n \"samples_ts\": [ 18.1394, 18.4394, 18.8666 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:45:11Z\",\n \"avg_ns\": 68531446819,\n \"stddev_ns\": 953620468,\n \"avg_ts\": 7.471984,\n \"stddev_ts\": 0.103673,\n \"samples_ns\": [ 69547021694, 67655084174, 68392234590 ],\n \"samples_ts\": [ 7.36193, 7.5678, 7.48623 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T22:43:20Z", - "avg_ns": 27710104300, - "stddev_ns": 546211649, - "avg_ts": 18.481817, - "stddev_ts": 0.365478, - "samples_ns": [ - 28225878851, - 27766588820, - 27137845229 - ], - "samples_ts": [ - 18.1394, - 18.4394, - 18.8666 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T22:45:11Z", - "avg_ns": 68531446819, - "stddev_ns": 953620468, - "avg_ts": 7.471984, - "stddev_ts": 0.103673, - "samples_ns": [ - 69547021694, - 67655084174, - 68392234590 - ], - "samples_ts": [ - 7.36193, - 7.5678, - 7.48623 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 223 - }, - { - "timestamp_utc": "2025-12-08T22:49:56.190269+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:48:37Z\",\n \"avg_ns\": 6716042022,\n \"stddev_ns\": 9613332,\n \"avg_ts\": 19.058870,\n \"stddev_ts\": 0.027276,\n \"samples_ns\": [ 6706812264, 6715316037, 6725997765 ],\n \"samples_ts\": [ 19.0851, 19.0609, 19.0306 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:49:04Z\",\n \"avg_ns\": 17166911214,\n \"stddev_ns\": 399149493,\n \"avg_ts\": 7.458928,\n \"stddev_ts\": 0.175722,\n \"samples_ns\": [ 17364599882, 16707498334, 17428635428 ],\n \"samples_ts\": [ 7.37132, 7.66123, 7.34424 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T22:48:37Z", - "avg_ns": 6716042022, - "stddev_ns": 9613332, - "avg_ts": 19.05887, - "stddev_ts": 0.027276, - "samples_ns": [ - 6706812264, - 6715316037, - 6725997765 - ], - "samples_ts": [ - 19.0851, - 19.0609, - 19.0306 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T22:49:04Z", - "avg_ns": 17166911214, - "stddev_ns": 399149493, - "avg_ts": 7.458928, - "stddev_ts": 0.175722, - "samples_ns": [ - 17364599882, - 16707498334, - 17428635428 - ], - "samples_ts": [ - 7.37132, - 7.66123, - 7.34424 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 224 - }, - { - "timestamp_utc": "2025-12-08T22:54:05.072244+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:49:56Z\",\n \"avg_ns\": 6893188321,\n \"stddev_ns\": 305708366,\n \"avg_ts\": 18.592827,\n \"stddev_ts\": 0.803994,\n \"samples_ns\": [ 6717714334, 6715662707, 7246187924 ],\n \"samples_ts\": [ 19.0541, 19.0599, 17.6645 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:50:24Z\",\n \"avg_ns\": 73317992090,\n \"stddev_ns\": 467279603,\n \"avg_ts\": 6.983468,\n \"stddev_ts\": 0.044365,\n \"samples_ns\": [ 73130114978, 73849968002, 72973893292 ],\n \"samples_ts\": [ 7.00122, 6.93298, 7.01621 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T22:49:56Z", - "avg_ns": 6893188321, - "stddev_ns": 305708366, - "avg_ts": 18.592827, - "stddev_ts": 0.803994, - "samples_ns": [ - 6717714334, - 6715662707, - 7246187924 - ], - "samples_ts": [ - 19.0541, - 19.0599, - 17.6645 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T22:50:24Z", - "avg_ns": 73317992090, - "stddev_ns": 467279603, - "avg_ts": 6.983468, - "stddev_ts": 0.044365, - "samples_ns": [ - 73130114978, - 73849968002, - 72973893292 - ], - "samples_ts": [ - 7.00122, - 6.93298, - 7.01621 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 225 - }, - { - "timestamp_utc": "2025-12-08T22:56:49.027808+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:54:05Z\",\n \"avg_ns\": 28126258388,\n \"stddev_ns\": 343987058,\n \"avg_ts\": 18.205433,\n \"stddev_ts\": 0.221131,\n \"samples_ns\": [ 28522401415, 27953288021, 27903085730 ],\n \"samples_ts\": [ 17.9508, 18.3163, 18.3492 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:55:58Z\",\n \"avg_ns\": 16852767552,\n \"stddev_ns\": 48133980,\n \"avg_ts\": 7.595233,\n \"stddev_ts\": 0.021676,\n \"samples_ns\": [ 16809716410, 16904736720, 16843849527 ],\n \"samples_ts\": [ 7.61464, 7.57184, 7.59921 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T22:54:05Z", - "avg_ns": 28126258388, - "stddev_ns": 343987058, - "avg_ts": 18.205433, - "stddev_ts": 0.221131, - "samples_ns": [ - 28522401415, - 27953288021, - 27903085730 - ], - "samples_ts": [ - 17.9508, - 18.3163, - 18.3492 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T22:55:58Z", - "avg_ns": 16852767552, - "stddev_ns": 48133980, - "avg_ts": 7.595233, - "stddev_ts": 0.021676, - "samples_ns": [ - 16809716410, - 16904736720, - 16843849527 - ], - "samples_ts": [ - 7.61464, - 7.57184, - 7.59921 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 226 - }, - { - "timestamp_utc": "2025-12-08T23:02:09.840267+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:56:49Z\",\n \"avg_ns\": 27828748130,\n \"stddev_ns\": 629402824,\n \"avg_ts\": 18.404436,\n \"stddev_ts\": 0.410895,\n \"samples_ns\": [ 28555407972, 27454371913, 27476464505 ],\n \"samples_ts\": [ 17.9301, 18.6491, 18.6341 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:58:41Z\",\n \"avg_ns\": 69459497698,\n \"stddev_ns\": 596057671,\n \"avg_ts\": 7.371565,\n \"stddev_ts\": 0.063438,\n \"samples_ns\": [ 69974763117, 69597036901, 68806693077 ],\n \"samples_ts\": [ 7.31692, 7.35664, 7.44114 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T22:56:49Z", - "avg_ns": 27828748130, - "stddev_ns": 629402824, - "avg_ts": 18.404436, - "stddev_ts": 0.410895, - "samples_ns": [ - 28555407972, - 27454371913, - 27476464505 - ], - "samples_ts": [ - 17.9301, - 18.6491, - 18.6341 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T22:58:41Z", - "avg_ns": 69459497698, - "stddev_ns": 596057671, - "avg_ts": 7.371565, - "stddev_ts": 0.063438, - "samples_ns": [ - 69974763117, - 69597036901, - 68806693077 - ], - "samples_ts": [ - 7.31692, - 7.35664, - 7.44114 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 227 - }, - { - "timestamp_utc": "2025-12-08T23:03:28.864381+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:02:10Z\",\n \"avg_ns\": 6716107213,\n \"stddev_ns\": 8397416,\n \"avg_ts\": 19.058679,\n \"stddev_ts\": 0.023844,\n \"samples_ns\": [ 6720337750, 6721547082, 6706436809 ],\n \"samples_ts\": [ 19.0467, 19.0432, 19.0861 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:02:37Z\",\n \"avg_ns\": 17052852439,\n \"stddev_ns\": 346622209,\n \"avg_ts\": 7.508129,\n \"stddev_ts\": 0.151568,\n \"samples_ns\": [ 16967828222, 17434075463, 16756653634 ],\n \"samples_ts\": [ 7.54369, 7.34194, 7.63876 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T23:02:10Z", - "avg_ns": 6716107213, - "stddev_ns": 8397416, - "avg_ts": 19.058679, - "stddev_ts": 0.023844, - "samples_ns": [ - 6720337750, - 6721547082, - 6706436809 - ], - "samples_ts": [ - 19.0467, - 19.0432, - 19.0861 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T23:02:37Z", - "avg_ns": 17052852439, - "stddev_ns": 346622209, - "avg_ts": 7.508129, - "stddev_ts": 0.151568, - "samples_ns": [ - 16967828222, - 17434075463, - 16756653634 - ], - "samples_ts": [ - 7.54369, - 7.34194, - 7.63876 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 228 - }, - { - "timestamp_utc": "2025-12-08T23:07:24.735592+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:03:29Z\",\n \"avg_ns\": 6923738974,\n \"stddev_ns\": 322076063,\n \"avg_ts\": 18.513132,\n \"stddev_ts\": 0.838742,\n \"samples_ns\": [ 6729149802, 6746562662, 7295504459 ],\n \"samples_ts\": [ 19.0217, 18.9726, 17.5451 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:03:57Z\",\n \"avg_ns\": 69028769105,\n \"stddev_ns\": 556584700,\n \"avg_ts\": 7.417518,\n \"stddev_ts\": 0.059739,\n \"samples_ns\": [ 68501671379, 68973864644, 69610771292 ],\n \"samples_ts\": [ 7.47427, 7.4231, 7.35518 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T23:03:29Z", - "avg_ns": 6923738974, - "stddev_ns": 322076063, - "avg_ts": 18.513132, - "stddev_ts": 0.838742, - "samples_ns": [ - 6729149802, - 6746562662, - 7295504459 - ], - "samples_ts": [ - 19.0217, - 18.9726, - 17.5451 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T23:03:57Z", - "avg_ns": 69028769105, - "stddev_ns": 556584700, - "avg_ts": 7.417518, - "stddev_ts": 0.059739, - "samples_ns": [ - 68501671379, - 68973864644, - 69610771292 - ], - "samples_ts": [ - 7.47427, - 7.4231, - 7.35518 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 229 - }, - { - "timestamp_utc": "2025-12-08T23:10:05.368875+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:07:25Z\",\n \"avg_ns\": 27190962773,\n \"stddev_ns\": 279858033,\n \"avg_ts\": 18.831108,\n \"stddev_ts\": 0.192671,\n \"samples_ns\": [ 27029733048, 27029040637, 27514114636 ],\n \"samples_ts\": [ 18.9421, 18.9426, 18.6086 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:09:14Z\",\n \"avg_ns\": 17020891730,\n \"stddev_ns\": 324985457,\n \"avg_ts\": 7.521978,\n \"stddev_ts\": 0.142054,\n \"samples_ns\": [ 16829792349, 16836751802, 17396131041 ],\n \"samples_ts\": [ 7.60556, 7.60242, 7.35796 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T23:07:25Z", - "avg_ns": 27190962773, - "stddev_ns": 279858033, - "avg_ts": 18.831108, - "stddev_ts": 0.192671, - "samples_ns": [ - 27029733048, - 27029040637, - 27514114636 - ], - "samples_ts": [ - 18.9421, - 18.9426, - 18.6086 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T23:09:14Z", - "avg_ns": 17020891730, - "stddev_ns": 324985457, - "avg_ts": 7.521978, - "stddev_ts": 0.142054, - "samples_ns": [ - 16829792349, - 16836751802, - 17396131041 - ], - "samples_ts": [ - 7.60556, - 7.60242, - 7.35796 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 230 - }, - { - "timestamp_utc": "2025-12-08T23:15:21.262227+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:10:06Z\",\n \"avg_ns\": 27331237871,\n \"stddev_ns\": 597762732,\n \"avg_ts\": 18.739045,\n \"stddev_ts\": 0.404736,\n \"samples_ns\": [ 26978183295, 26994116857, 28021413463 ],\n \"samples_ts\": [ 18.9783, 18.9671, 18.2717 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:11:55Z\",\n \"avg_ns\": 68649999295,\n \"stddev_ns\": 591879685,\n \"avg_ts\": 7.458489,\n \"stddev_ts\": 0.064071,\n \"samples_ns\": [ 69312623564, 68463663829, 68173710494 ],\n \"samples_ts\": [ 7.38682, 7.47842, 7.51023 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T23:10:06Z", - "avg_ns": 27331237871, - "stddev_ns": 597762732, - "avg_ts": 18.739045, - "stddev_ts": 0.404736, - "samples_ns": [ - 26978183295, - 26994116857, - 28021413463 - ], - "samples_ts": [ - 18.9783, - 18.9671, - 18.2717 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T23:11:55Z", - "avg_ns": 68649999295, - "stddev_ns": 591879685, - "avg_ts": 7.458489, - "stddev_ts": 0.064071, - "samples_ns": [ - 69312623564, - 68463663829, - 68173710494 - ], - "samples_ts": [ - 7.38682, - 7.47842, - 7.51023 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 231 - }, - { - "timestamp_utc": "2025-12-08T23:16:40.164131+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:15:22Z\",\n \"avg_ns\": 6731078486,\n \"stddev_ns\": 760730,\n \"avg_ts\": 19.016269,\n \"stddev_ts\": 0.002149,\n \"samples_ns\": [ 6730239911, 6731271269, 6731724278 ],\n \"samples_ts\": [ 19.0186, 19.0157, 19.0144 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:15:48Z\",\n \"avg_ns\": 17005008765,\n \"stddev_ns\": 358554789,\n \"avg_ts\": 7.529399,\n \"stddev_ts\": 0.156856,\n \"samples_ns\": [ 16807289331, 17418895157, 16788841807 ],\n \"samples_ts\": [ 7.61574, 7.34834, 7.62411 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T23:15:22Z", - "avg_ns": 6731078486, - "stddev_ns": 760730, - "avg_ts": 19.016269, - "stddev_ts": 0.002149, - "samples_ns": [ - 6730239911, - 6731271269, - 6731724278 - ], - "samples_ts": [ - 19.0186, - 19.0157, - 19.0144 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T23:15:48Z", - "avg_ns": 17005008765, - "stddev_ns": 358554789, - "avg_ts": 7.529399, - "stddev_ts": 0.156856, - "samples_ns": [ - 16807289331, - 17418895157, - 16788841807 - ], - "samples_ts": [ - 7.61574, - 7.34834, - 7.62411 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 232 - }, - { - "timestamp_utc": "2025-12-08T23:20:36.290065+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:16:41Z\",\n \"avg_ns\": 6890259634,\n \"stddev_ns\": 276482883,\n \"avg_ts\": 18.596459,\n \"stddev_ts\": 0.729333,\n \"samples_ns\": [ 7209483357, 6734512030, 6726783517 ],\n \"samples_ts\": [ 17.7544, 19.0066, 19.0284 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:17:08Z\",\n \"avg_ns\": 69069474952,\n \"stddev_ns\": 3276558287,\n \"avg_ts\": 7.414391,\n \"stddev_ts\": 0.131824,\n \"samples_ns\": [ 68994455890, 67878879761, 70335089206 ],\n \"samples_ts\": [ 7.42089, 7.54285, 7.27944 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T23:16:41Z", - "avg_ns": 6890259634, - "stddev_ns": 276482883, - "avg_ts": 18.596459, - "stddev_ts": 0.729333, - "samples_ns": [ - 7209483357, - 6734512030, - 6726783517 - ], - "samples_ts": [ - 17.7544, - 19.0066, - 19.0284 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T23:17:08Z", - "avg_ns": 69069474952, - "stddev_ns": 3276558287, - "avg_ts": 7.414391, - "stddev_ts": 0.131824, - "samples_ns": [ - 68994455890, - 67878879761, - 70335089206 - ], - "samples_ts": [ - 7.42089, - 7.54285, - 7.27944 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 233 - }, - { - "timestamp_utc": "2025-12-08T23:23:18.181203+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:20:37Z\",\n \"avg_ns\": 27516081635,\n \"stddev_ns\": 302051769,\n \"avg_ts\": 18.608803,\n \"stddev_ts\": 0.205264,\n \"samples_ns\": [ 27614482363, 27177099898, 27756662644 ],\n \"samples_ts\": [ 18.541, 18.8394, 18.446 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:22:26Z\",\n \"avg_ns\": 17055231949,\n \"stddev_ns\": 360095227,\n \"avg_ts\": 7.507233,\n \"stddev_ts\": 0.156651,\n \"samples_ns\": [ 16876797659, 16819196206, 17469701984 ],\n \"samples_ts\": [ 7.58438, 7.61035, 7.32697 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T23:20:37Z", - "avg_ns": 27516081635, - "stddev_ns": 302051769, - "avg_ts": 18.608803, - "stddev_ts": 0.205264, - "samples_ns": [ - 27614482363, - 27177099898, - 27756662644 - ], - "samples_ts": [ - 18.541, - 18.8394, - 18.446 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T23:22:26Z", - "avg_ns": 17055231949, - "stddev_ns": 360095227, - "avg_ts": 7.507233, - "stddev_ts": 0.156651, - "samples_ns": [ - 16876797659, - 16819196206, - 17469701984 - ], - "samples_ts": [ - 7.58438, - 7.61035, - 7.32697 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 234 - }, - { - "timestamp_utc": "2025-12-08T23:28:36.475703+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:23:18Z\",\n \"avg_ns\": 28111818839,\n \"stddev_ns\": 364560679,\n \"avg_ts\": 18.215037,\n \"stddev_ts\": 0.237870,\n \"samples_ns\": [ 28273792384, 28367325546, 27694338589 ],\n \"samples_ts\": [ 18.1086, 18.0489, 18.4875 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:25:11Z\",\n \"avg_ns\": 68419080592,\n \"stddev_ns\": 260371733,\n \"avg_ts\": 7.483365,\n \"stddev_ts\": 0.028523,\n \"samples_ns\": [ 68128356450, 68498088256, 68630797072 ],\n \"samples_ts\": [ 7.51523, 7.47466, 7.46021 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T23:23:18Z", - "avg_ns": 28111818839, - "stddev_ns": 364560679, - "avg_ts": 18.215037, - "stddev_ts": 0.23787, - "samples_ns": [ - 28273792384, - 28367325546, - 27694338589 - ], - "samples_ts": [ - 18.1086, - 18.0489, - 18.4875 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T23:25:11Z", - "avg_ns": 68419080592, - "stddev_ns": 260371733, - "avg_ts": 7.483365, - "stddev_ts": 0.028523, - "samples_ns": [ - 68128356450, - 68498088256, - 68630797072 - ], - "samples_ts": [ - 7.51523, - 7.47466, - 7.46021 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 235 - }, - { - "timestamp_utc": "2025-12-08T23:29:55.152989+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:28:37Z\",\n \"avg_ns\": 6737669330,\n \"stddev_ns\": 2401418,\n \"avg_ts\": 18.997669,\n \"stddev_ts\": 0.006770,\n \"samples_ns\": [ 6736227321, 6740441497, 6736339172 ],\n \"samples_ts\": [ 19.0017, 18.9899, 19.0014 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:29:04Z\",\n \"avg_ns\": 16920279799,\n \"stddev_ns\": 285204572,\n \"avg_ts\": 7.566308,\n \"stddev_ts\": 0.126442,\n \"samples_ns\": [ 16713210389, 16802041364, 17245587645 ],\n \"samples_ts\": [ 7.65861, 7.61812, 7.42219 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T23:28:37Z", - "avg_ns": 6737669330, - "stddev_ns": 2401418, - "avg_ts": 18.997669, - "stddev_ts": 0.00677, - "samples_ns": [ - 6736227321, - 6740441497, - 6736339172 - ], - "samples_ts": [ - 19.0017, - 18.9899, - 19.0014 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T23:29:04Z", - "avg_ns": 16920279799, - "stddev_ns": 285204572, - "avg_ts": 7.566308, - "stddev_ts": 0.126442, - "samples_ns": [ - 16713210389, - 16802041364, - 17245587645 - ], - "samples_ts": [ - 7.65861, - 7.61812, - 7.42219 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 236 - }, - { - "timestamp_utc": "2025-12-08T23:33:50.375708+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:29:55Z\",\n \"avg_ns\": 6897622321,\n \"stddev_ns\": 313231827,\n \"avg_ts\": 18.582013,\n \"stddev_ts\": 0.822281,\n \"samples_ns\": [ 6715560682, 7259308519, 6717997764 ],\n \"samples_ts\": [ 19.0602, 17.6325, 19.0533 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:30:23Z\",\n \"avg_ns\": 68936908217,\n \"stddev_ns\": 3279456757,\n \"avg_ts\": 7.428677,\n \"stddev_ts\": 0.133390,\n \"samples_ns\": [ 68941040371, 70172360509, 67697323772 ],\n \"samples_ts\": [ 7.42664, 7.29632, 7.56308 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T23:29:55Z", - "avg_ns": 6897622321, - "stddev_ns": 313231827, - "avg_ts": 18.582013, - "stddev_ts": 0.822281, - "samples_ns": [ - 6715560682, - 7259308519, - 6717997764 - ], - "samples_ts": [ - 19.0602, - 17.6325, - 19.0533 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T23:30:23Z", - "avg_ns": 68936908217, - "stddev_ns": 3279456757, - "avg_ts": 7.428677, - "stddev_ts": 0.13339, - "samples_ns": [ - 68941040371, - 70172360509, - 67697323772 - ], - "samples_ts": [ - 7.42664, - 7.29632, - 7.56308 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 237 - }, - { - "timestamp_utc": "2025-12-08T23:36:37.280751+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:33:51Z\",\n \"avg_ns\": 27973128124,\n \"stddev_ns\": 563236184,\n \"avg_ts\": 18.308202,\n \"stddev_ts\": 0.366676,\n \"samples_ns\": [ 28581945945, 27470626767, 27866811661 ],\n \"samples_ts\": [ 17.9134, 18.6381, 18.3731 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:35:43Z\",\n \"avg_ns\": 17794702865,\n \"stddev_ns\": 100207305,\n \"avg_ts\": 7.193304,\n \"stddev_ts\": 0.040408,\n \"samples_ns\": [ 17715153912, 17907247249, 17761707435 ],\n \"samples_ts\": [ 7.22545, 7.14794, 7.20651 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T23:33:51Z", - "avg_ns": 27973128124, - "stddev_ns": 563236184, - "avg_ts": 18.308202, - "stddev_ts": 0.366676, - "samples_ns": [ - 28581945945, - 27470626767, - 27866811661 - ], - "samples_ts": [ - 17.9134, - 18.6381, - 18.3731 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T23:35:43Z", - "avg_ns": 17794702865, - "stddev_ns": 100207305, - "avg_ts": 7.193304, - "stddev_ts": 0.040408, - "samples_ns": [ - 17715153912, - 17907247249, - 17761707435 - ], - "samples_ts": [ - 7.22545, - 7.14794, - 7.20651 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 238 - }, - { - "timestamp_utc": "2025-12-08T23:41:58.269352+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:36:38Z\",\n \"avg_ns\": 27678753623,\n \"stddev_ns\": 321910873,\n \"avg_ts\": 18.499600,\n \"stddev_ts\": 0.213721,\n \"samples_ns\": [ 28050419045, 27497940212, 27487901613 ],\n \"samples_ts\": [ 18.2528, 18.6196, 18.6264 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:38:28Z\",\n \"avg_ns\": 69820051855,\n \"stddev_ns\": 673409950,\n \"avg_ts\": 7.333592,\n \"stddev_ts\": 0.070843,\n \"samples_ns\": [ 70454901654, 69113775839, 69891478073 ],\n \"samples_ts\": [ 7.26706, 7.40807, 7.32564 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T23:36:38Z", - "avg_ns": 27678753623, - "stddev_ns": 321910873, - "avg_ts": 18.4996, - "stddev_ts": 0.213721, - "samples_ns": [ - 28050419045, - 27497940212, - 27487901613 - ], - "samples_ts": [ - 18.2528, - 18.6196, - 18.6264 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T23:38:28Z", - "avg_ns": 69820051855, - "stddev_ns": 673409950, - "avg_ts": 7.333592, - "stddev_ts": 0.070843, - "samples_ns": [ - 70454901654, - 69113775839, - 69891478073 - ], - "samples_ts": [ - 7.26706, - 7.40807, - 7.32564 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 239 - }, - { - "timestamp_utc": "2025-12-08T23:43:20.678854+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:41:59Z\",\n \"avg_ns\": 6941754814,\n \"stddev_ns\": 168373585,\n \"avg_ts\": 18.446436,\n \"stddev_ts\": 0.451139,\n \"samples_ns\": [ 7086940358, 6981146531, 6757177554 ],\n \"samples_ts\": [ 18.0614, 18.3351, 18.9428 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:42:27Z\",\n \"avg_ns\": 17765307713,\n \"stddev_ns\": 24307476,\n \"avg_ts\": 7.205063,\n \"stddev_ts\": 0.009856,\n \"samples_ns\": [ 17742187749, 17763086610, 17790648782 ],\n \"samples_ts\": [ 7.21444, 7.20595, 7.19479 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T23:41:59Z", - "avg_ns": 6941754814, - "stddev_ns": 168373585, - "avg_ts": 18.446436, - "stddev_ts": 0.451139, - "samples_ns": [ - 7086940358, - 6981146531, - 6757177554 - ], - "samples_ts": [ - 18.0614, - 18.3351, - 18.9428 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T23:42:27Z", - "avg_ns": 17765307713, - "stddev_ns": 24307476, - "avg_ts": 7.205063, - "stddev_ts": 0.009856, - "samples_ns": [ - 17742187749, - 17763086610, - 17790648782 - ], - "samples_ts": [ - 7.21444, - 7.20595, - 7.19479 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 240 - }, - { - "timestamp_utc": "2025-12-08T23:47:26.319758+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:43:21Z\",\n \"avg_ns\": 6727120625,\n \"stddev_ns\": 6257989,\n \"avg_ts\": 19.027468,\n \"stddev_ts\": 0.017691,\n \"samples_ns\": [ 6734325024, 6723033749, 6724003102 ],\n \"samples_ts\": [ 19.0071, 19.039, 19.0363 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:43:48Z\",\n \"avg_ns\": 72583566777,\n \"stddev_ns\": 23872651,\n \"avg_ts\": 7.053939,\n \"stddev_ts\": 0.002320,\n \"samples_ns\": [ 72557315263, 72603965897, 72589419173 ],\n \"samples_ts\": [ 7.05649, 7.05196, 7.05337 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T23:43:21Z", - "avg_ns": 6727120625, - "stddev_ns": 6257989, - "avg_ts": 19.027468, - "stddev_ts": 0.017691, - "samples_ns": [ - 6734325024, - 6723033749, - 6724003102 - ], - "samples_ts": [ - 19.0071, - 19.039, - 19.0363 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T23:43:48Z", - "avg_ns": 72583566777, - "stddev_ns": 23872651, - "avg_ts": 7.053939, - "stddev_ts": 0.00232, - "samples_ns": [ - 72557315263, - 72603965897, - 72589419173 - ], - "samples_ts": [ - 7.05649, - 7.05196, - 7.05337 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 241 - }, - { - "timestamp_utc": "2025-12-08T23:50:07.220138+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:47:27Z\",\n \"avg_ns\": 27189126685,\n \"stddev_ns\": 304625552,\n \"avg_ts\": 18.832623,\n \"stddev_ts\": 0.209668,\n \"samples_ns\": [ 26994542448, 27540189234, 27032648374 ],\n \"samples_ts\": [ 18.9668, 18.591, 18.9401 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:49:15Z\",\n \"avg_ns\": 17123985217,\n \"stddev_ns\": 414906192,\n \"avg_ts\": 7.477782,\n \"stddev_ts\": 0.178716,\n \"samples_ns\": [ 16906724447, 17602406669, 16862824537 ],\n \"samples_ts\": [ 7.57095, 7.27173, 7.59066 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T23:47:27Z", - "avg_ns": 27189126685, - "stddev_ns": 304625552, - "avg_ts": 18.832623, - "stddev_ts": 0.209668, - "samples_ns": [ - 26994542448, - 27540189234, - 27032648374 - ], - "samples_ts": [ - 18.9668, - 18.591, - 18.9401 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T23:49:15Z", - "avg_ns": 17123985217, - "stddev_ns": 414906192, - "avg_ts": 7.477782, - "stddev_ts": 0.178716, - "samples_ns": [ - 16906724447, - 17602406669, - 16862824537 - ], - "samples_ts": [ - 7.57095, - 7.27173, - 7.59066 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 242 - }, - { - "timestamp_utc": "2025-12-08T23:55:24.340092+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:50:08Z\",\n \"avg_ns\": 27344645593,\n \"stddev_ns\": 326730968,\n \"avg_ts\": 18.725753,\n \"stddev_ts\": 0.225301,\n \"samples_ns\": [ 27533205534, 26967369179, 27533362066 ],\n \"samples_ts\": [ 18.5957, 18.9859, 18.5956 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:51:57Z\",\n \"avg_ns\": 69026611951,\n \"stddev_ns\": 607029909,\n \"avg_ts\": 7.417810,\n \"stddev_ts\": 0.064904,\n \"samples_ns\": [ 68687598698, 69727426230, 68664810925 ],\n \"samples_ts\": [ 7.45404, 7.34288, 7.45651 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T23:50:08Z", - "avg_ns": 27344645593, - "stddev_ns": 326730968, - "avg_ts": 18.725753, - "stddev_ts": 0.225301, - "samples_ns": [ - 27533205534, - 26967369179, - 27533362066 - ], - "samples_ts": [ - 18.5957, - 18.9859, - 18.5956 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T23:51:57Z", - "avg_ns": 69026611951, - "stddev_ns": 607029909, - "avg_ts": 7.41781, - "stddev_ts": 0.064904, - "samples_ns": [ - 68687598698, - 69727426230, - 68664810925 - ], - "samples_ts": [ - 7.45404, - 7.34288, - 7.45651 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 243 - }, - { - "timestamp_utc": "2025-12-08T23:56:46.162770+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:55:25Z\",\n \"avg_ns\": 6896039558,\n \"stddev_ns\": 316363417,\n \"avg_ts\": 18.586784,\n \"stddev_ts\": 0.830692,\n \"samples_ns\": [ 7261335831, 6711203572, 6715579271 ],\n \"samples_ts\": [ 17.6276, 19.0726, 19.0602 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:55:52Z\",\n \"avg_ns\": 17800858044,\n \"stddev_ns\": 55331492,\n \"avg_ts\": 7.190711,\n \"stddev_ts\": 0.022383,\n \"samples_ns\": [ 17844109763, 17738507437, 17819956934 ],\n \"samples_ts\": [ 7.17324, 7.21594, 7.18296 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T23:55:25Z", - "avg_ns": 6896039558, - "stddev_ns": 316363417, - "avg_ts": 18.586784, - "stddev_ts": 0.830692, - "samples_ns": [ - 7261335831, - 6711203572, - 6715579271 - ], - "samples_ts": [ - 17.6276, - 19.0726, - 19.0602 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-08T23:55:52Z", - "avg_ns": 17800858044, - "stddev_ns": 55331492, - "avg_ts": 7.190711, - "stddev_ts": 0.022383, - "samples_ns": [ - 17844109763, - 17738507437, - 17819956934 - ], - "samples_ts": [ - 7.17324, - 7.21594, - 7.18296 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 244 - }, - { - "timestamp_utc": "2025-12-09T00:00:53.904541+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:56:46Z\",\n \"avg_ns\": 6710021926,\n \"stddev_ns\": 8765076,\n \"avg_ts\": 19.075965,\n \"stddev_ts\": 0.024899,\n \"samples_ns\": [ 6704434522, 6720123538, 6705507719 ],\n \"samples_ts\": [ 19.0918, 19.0473, 19.0888 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:57:13Z\",\n \"avg_ns\": 73310938622,\n \"stddev_ns\": 60223795,\n \"avg_ts\": 6.983954,\n \"stddev_ts\": 0.005734,\n \"samples_ns\": [ 73380393063, 73273241653, 73279181152 ],\n \"samples_ts\": [ 6.97734, 6.98754, 6.98698 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-08T23:56:46Z", - "avg_ns": 6710021926, - "stddev_ns": 8765076, - "avg_ts": 19.075965, - "stddev_ts": 0.024899, - "samples_ns": [ - 6704434522, - 6720123538, - 6705507719 - ], - "samples_ts": [ - 19.0918, - 19.0473, - 19.0888 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-08T23:57:13Z", - "avg_ns": 73310938622, - "stddev_ns": 60223795, - "avg_ts": 6.983954, - "stddev_ts": 0.005734, - "samples_ns": [ - 73380393063, - 73273241653, - 73279181152 - ], - "samples_ts": [ - 6.97734, - 6.98754, - 6.98698 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 245 - }, - { - "timestamp_utc": "2025-12-09T00:03:41.040716+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:00:54Z\",\n \"avg_ns\": 27725003129,\n \"stddev_ns\": 538284616,\n \"avg_ts\": 18.471721,\n \"stddev_ts\": 0.358147,\n \"samples_ns\": [ 28276962935, 27201524635, 27696521819 ],\n \"samples_ts\": [ 18.1066, 18.8225, 18.4861 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:02:45Z\",\n \"avg_ns\": 18384353019,\n \"stddev_ns\": 3088685363,\n \"avg_ts\": 6.966772,\n \"stddev_ts\": 0.212209,\n \"samples_ns\": [ 18976692428, 17856978381, 18319388250 ],\n \"samples_ts\": [ 6.74512, 7.16807, 6.98713 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T00:00:54Z", - "avg_ns": 27725003129, - "stddev_ns": 538284616, - "avg_ts": 18.471721, - "stddev_ts": 0.358147, - "samples_ns": [ - 28276962935, - 27201524635, - 27696521819 - ], - "samples_ts": [ - 18.1066, - 18.8225, - 18.4861 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T00:02:45Z", - "avg_ns": 18384353019, - "stddev_ns": 3088685363, - "avg_ts": 6.966772, - "stddev_ts": 0.212209, - "samples_ns": [ - 18976692428, - 17856978381, - 18319388250 - ], - "samples_ts": [ - 6.74512, - 7.16807, - 6.98713 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 246 - }, - { - "timestamp_utc": "2025-12-09T00:09:02.935028+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:03:41Z\",\n \"avg_ns\": 28539680160,\n \"stddev_ns\": 316847669,\n \"avg_ts\": 17.941417,\n \"stddev_ts\": 0.200210,\n \"samples_ns\": [ 28786672324, 28649933026, 28182435131 ],\n \"samples_ts\": [ 17.786, 17.8709, 18.1673 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:05:36Z\",\n \"avg_ns\": 68852917916,\n \"stddev_ns\": 295252626,\n \"avg_ts\": 7.436232,\n \"stddev_ts\": 0.031839,\n \"samples_ns\": [ 69178686686, 68602969503, 68777097559 ],\n \"samples_ts\": [ 7.40112, 7.46323, 7.44434 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T00:03:41Z", - "avg_ns": 28539680160, - "stddev_ns": 316847669, - "avg_ts": 17.941417, - "stddev_ts": 0.20021, - "samples_ns": [ - 28786672324, - 28649933026, - 28182435131 - ], - "samples_ts": [ - 17.786, - 17.8709, - 18.1673 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T00:05:36Z", - "avg_ns": 68852917916, - "stddev_ns": 295252626, - "avg_ts": 7.436232, - "stddev_ts": 0.031839, - "samples_ns": [ - 69178686686, - 68602969503, - 68777097559 - ], - "samples_ts": [ - 7.40112, - 7.46323, - 7.44434 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 247 - }, - { - "timestamp_utc": "2025-12-09T00:10:24.731314+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:09:03Z\",\n \"avg_ns\": 6746719899,\n \"stddev_ns\": 29149378,\n \"avg_ts\": 18.972418,\n \"stddev_ts\": 0.081975,\n \"samples_ns\": [ 6746922968, 6717469633, 6775767097 ],\n \"samples_ts\": [ 18.9716, 19.0548, 18.8909 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:09:30Z\",\n \"avg_ns\": 17773478952,\n \"stddev_ns\": 11093616,\n \"avg_ts\": 7.201743,\n \"stddev_ts\": 0.004493,\n \"samples_ns\": [ 17768956489, 17786117721, 17765362648 ],\n \"samples_ts\": [ 7.20357, 7.19662, 7.20503 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T00:09:03Z", - "avg_ns": 6746719899, - "stddev_ns": 29149378, - "avg_ts": 18.972418, - "stddev_ts": 0.081975, - "samples_ns": [ - 6746922968, - 6717469633, - 6775767097 - ], - "samples_ts": [ - 18.9716, - 19.0548, - 18.8909 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T00:09:30Z", - "avg_ns": 17773478952, - "stddev_ns": 11093616, - "avg_ts": 7.201743, - "stddev_ts": 0.004493, - "samples_ns": [ - 17768956489, - 17786117721, - 17765362648 - ], - "samples_ts": [ - 7.20357, - 7.19662, - 7.20503 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 248 - }, - { - "timestamp_utc": "2025-12-09T00:14:31.511618+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:10:25Z\",\n \"avg_ns\": 6891901738,\n \"stddev_ns\": 305342452,\n \"avg_ts\": 18.596255,\n \"stddev_ts\": 0.803491,\n \"samples_ns\": [ 7244223484, 6727405068, 6704076662 ],\n \"samples_ts\": [ 17.6693, 19.0267, 19.0929 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:10:52Z\",\n \"avg_ns\": 72796480370,\n \"stddev_ns\": 771047190,\n \"avg_ts\": 7.033836,\n \"stddev_ts\": 0.074849,\n \"samples_ns\": [ 73047322481, 73410871755, 71931246874 ],\n \"samples_ts\": [ 7.00915, 6.97444, 7.11791 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T00:10:25Z", - "avg_ns": 6891901738, - "stddev_ns": 305342452, - "avg_ts": 18.596255, - "stddev_ts": 0.803491, - "samples_ns": [ - 7244223484, - 6727405068, - 6704076662 - ], - "samples_ts": [ - 17.6693, - 19.0267, - 19.0929 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T00:10:52Z", - "avg_ns": 72796480370, - "stddev_ns": 771047190, - "avg_ts": 7.033836, - "stddev_ts": 0.074849, - "samples_ns": [ - 73047322481, - 73410871755, - 71931246874 - ], - "samples_ts": [ - 7.00915, - 6.97444, - 7.11791 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 249 - }, - { - "timestamp_utc": "2025-12-09T00:17:13.641314+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:14:32Z\",\n \"avg_ns\": 27679281469,\n \"stddev_ns\": 322730172,\n \"avg_ts\": 18.499255,\n \"stddev_ts\": 0.214255,\n \"samples_ns\": [ 28051853331, 27486113339, 27499877739 ],\n \"samples_ts\": [ 18.2519, 18.6276, 18.6183 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:16:22Z\",\n \"avg_ns\": 16850454104,\n \"stddev_ns\": 119698394,\n \"avg_ts\": 7.596489,\n \"stddev_ts\": 0.053775,\n \"samples_ns\": [ 16804359683, 16986347156, 16760655473 ],\n \"samples_ts\": [ 7.61707, 7.53546, 7.63693 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T00:14:32Z", - "avg_ns": 27679281469, - "stddev_ns": 322730172, - "avg_ts": 18.499255, - "stddev_ts": 0.214255, - "samples_ns": [ - 28051853331, - 27486113339, - 27499877739 - ], - "samples_ts": [ - 18.2519, - 18.6276, - 18.6183 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T00:16:22Z", - "avg_ns": 16850454104, - "stddev_ns": 119698394, - "avg_ts": 7.596489, - "stddev_ts": 0.053775, - "samples_ns": [ - 16804359683, - 16986347156, - 16760655473 - ], - "samples_ts": [ - 7.61707, - 7.53546, - 7.63693 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 250 - }, - { - "timestamp_utc": "2025-12-09T00:22:36.814940+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:17:14Z\",\n \"avg_ns\": 27871515650,\n \"stddev_ns\": 645488493,\n \"avg_ts\": 18.376494,\n \"stddev_ts\": 0.420019,\n \"samples_ns\": [ 27526090745, 27472243294, 28616212911 ],\n \"samples_ts\": [ 18.6005, 18.637, 17.892 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:19:06Z\",\n \"avg_ns\": 70169104515,\n \"stddev_ns\": 1293926743,\n \"avg_ts\": 7.298324,\n \"stddev_ts\": 0.135496,\n \"samples_ns\": [ 71264212776, 70501777459, 68741323312 ],\n \"samples_ts\": [ 7.18453, 7.26223, 7.44821 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T00:17:14Z", - "avg_ns": 27871515650, - "stddev_ns": 645488493, - "avg_ts": 18.376494, - "stddev_ts": 0.420019, - "samples_ns": [ - 27526090745, - 27472243294, - 28616212911 - ], - "samples_ts": [ - 18.6005, - 18.637, - 17.892 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T00:19:06Z", - "avg_ns": 70169104515, - "stddev_ns": 1293926743, - "avg_ts": 7.298324, - "stddev_ts": 0.135496, - "samples_ns": [ - 71264212776, - 70501777459, - 68741323312 - ], - "samples_ts": [ - 7.18453, - 7.26223, - 7.44821 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 251 - }, - { - "timestamp_utc": "2025-12-09T00:23:19.488217+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:22:37Z\",\n \"avg_ns\": 3433192864,\n \"stddev_ns\": 10666739,\n \"avg_ts\": 37.283318,\n \"stddev_ts\": 0.115657,\n \"samples_ns\": [ 3425291475, 3428960932, 3445326185 ],\n \"samples_ts\": [ 37.3691, 37.3291, 37.1518 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:22:51Z\",\n \"avg_ns\": 9341627288,\n \"stddev_ns\": 371337226,\n \"avg_ts\": 13.716248,\n \"stddev_ts\": 0.533530,\n \"samples_ns\": [ 9163941712, 9768423023, 9092517131 ],\n \"samples_ts\": [ 13.9678, 13.1034, 14.0775 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T00:22:37Z", - "avg_ns": 3433192864, - "stddev_ns": 10666739, - "avg_ts": 37.283318, - "stddev_ts": 0.115657, - "samples_ns": [ - 3425291475, - 3428960932, - 3445326185 - ], - "samples_ts": [ - 37.3691, - 37.3291, - 37.1518 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T00:22:51Z", - "avg_ns": 9341627288, - "stddev_ns": 371337226, - "avg_ts": 13.716248, - "stddev_ts": 0.53353, - "samples_ns": [ - 9163941712, - 9768423023, - 9092517131 - ], - "samples_ts": [ - 13.9678, - 13.1034, - 14.0775 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 252 - }, - { - "timestamp_utc": "2025-12-09T00:25:26.893704+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:23:20Z\",\n \"avg_ns\": 3620638555,\n \"stddev_ns\": 335161992,\n \"avg_ts\": 35.545664,\n \"stddev_ts\": 3.123892,\n \"samples_ns\": [ 4007564330, 3420112538, 3434238799 ],\n \"samples_ts\": [ 31.9396, 37.4257, 37.2717 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:23:34Z\",\n \"avg_ns\": 37384249779,\n \"stddev_ns\": 417895062,\n \"avg_ts\": 13.696756,\n \"stddev_ts\": 0.154100,\n \"samples_ns\": [ 37634028604, 36901807417, 37616913318 ],\n \"samples_ts\": [ 13.6047, 13.8747, 13.6109 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T00:23:20Z", - "avg_ns": 3620638555, - "stddev_ns": 335161992, - "avg_ts": 35.545664, - "stddev_ts": 3.123892, - "samples_ns": [ - 4007564330, - 3420112538, - 3434238799 - ], - "samples_ts": [ - 31.9396, - 37.4257, - 37.2717 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T00:23:34Z", - "avg_ns": 37384249779, - "stddev_ns": 417895062, - "avg_ts": 13.696756, - "stddev_ts": 0.1541, - "samples_ns": [ - 37634028604, - 36901807417, - 37616913318 - ], - "samples_ts": [ - 13.6047, - 13.8747, - 13.6109 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 253 - }, - { - "timestamp_utc": "2025-12-09T00:26:50.476906+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:25:27Z\",\n \"avg_ns\": 13811003944,\n \"stddev_ns\": 17570736,\n \"avg_ts\": 37.071929,\n \"stddev_ts\": 0.047136,\n \"samples_ns\": [ 13830775229, 13805060279, 13797176325 ],\n \"samples_ts\": [ 37.0189, 37.0878, 37.109 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:26:22Z\",\n \"avg_ns\": 9117491617,\n \"stddev_ns\": 29322747,\n \"avg_ts\": 14.039046,\n \"stddev_ts\": 0.045231,\n \"samples_ns\": [ 9083793054, 9137190382, 9131491416 ],\n \"samples_ts\": [ 14.091, 14.0087, 14.0174 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T00:25:27Z", - "avg_ns": 13811003944, - "stddev_ns": 17570736, - "avg_ts": 37.071929, - "stddev_ts": 0.047136, - "samples_ns": [ - 13830775229, - 13805060279, - 13797176325 - ], - "samples_ts": [ - 37.0189, - 37.0878, - 37.109 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T00:26:22Z", - "avg_ns": 9117491617, - "stddev_ns": 29322747, - "avg_ts": 14.039046, - "stddev_ts": 0.045231, - "samples_ns": [ - 9083793054, - 9137190382, - 9131491416 - ], - "samples_ts": [ - 14.091, - 14.0087, - 14.0174 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 254 - }, - { - "timestamp_utc": "2025-12-09T00:29:39.496136+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:26:51Z\",\n \"avg_ns\": 13778237633,\n \"stddev_ns\": 17846811,\n \"avg_ts\": 37.160092,\n \"stddev_ts\": 0.048154,\n \"samples_ns\": [ 13782683652, 13758588506, 13793440742 ],\n \"samples_ts\": [ 37.1481, 37.2131, 37.1191 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:27:46Z\",\n \"avg_ns\": 37660894476,\n \"stddev_ns\": 43159004,\n \"avg_ts\": 13.595016,\n \"stddev_ts\": 0.015585,\n \"samples_ns\": [ 37670847285, 37698207175, 37613628969 ],\n \"samples_ts\": [ 13.5914, 13.5815, 13.6121 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T00:26:51Z", - "avg_ns": 13778237633, - "stddev_ns": 17846811, - "avg_ts": 37.160092, - "stddev_ts": 0.048154, - "samples_ns": [ - 13782683652, - 13758588506, - 13793440742 - ], - "samples_ts": [ - 37.1481, - 37.2131, - 37.1191 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T00:27:46Z", - "avg_ns": 37660894476, - "stddev_ns": 43159004, - "avg_ts": 13.595016, - "stddev_ts": 0.015585, - "samples_ns": [ - 37670847285, - 37698207175, - 37613628969 - ], - "samples_ts": [ - 13.5914, - 13.5815, - 13.6121 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 255 - }, - { - "timestamp_utc": "2025-12-09T00:30:21.651320+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:29:40Z\",\n \"avg_ns\": 3429832438,\n \"stddev_ns\": 8304445,\n \"avg_ts\": 37.319753,\n \"stddev_ts\": 0.090471,\n \"samples_ns\": [ 3420336853, 3435734503, 3433425960 ],\n \"samples_ts\": [ 37.4232, 37.2555, 37.2805 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:29:53Z\",\n \"avg_ns\": 9169744921,\n \"stddev_ns\": 38070101,\n \"avg_ts\": 13.959109,\n \"stddev_ts\": 0.058025,\n \"samples_ns\": [ 9203310364, 9128378599, 9177545800 ],\n \"samples_ts\": [ 13.908, 14.0222, 13.9471 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T00:29:40Z", - "avg_ns": 3429832438, - "stddev_ns": 8304445, - "avg_ts": 37.319753, - "stddev_ts": 0.090471, - "samples_ns": [ - 3420336853, - 3435734503, - 3433425960 - ], - "samples_ts": [ - 37.4232, - 37.2555, - 37.2805 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T00:29:53Z", - "avg_ns": 9169744921, - "stddev_ns": 38070101, - "avg_ts": 13.959109, - "stddev_ts": 0.058025, - "samples_ns": [ - 9203310364, - 9128378599, - 9177545800 - ], - "samples_ts": [ - 13.908, - 14.0222, - 13.9471 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 256 - }, - { - "timestamp_utc": "2025-12-09T00:32:28.548290+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:30:22Z\",\n \"avg_ns\": 3429073822,\n \"stddev_ns\": 8802524,\n \"avg_ts\": 37.328027,\n \"stddev_ts\": 0.095750,\n \"samples_ns\": [ 3427392670, 3421233508, 3438595290 ],\n \"samples_ts\": [ 37.3462, 37.4134, 37.2245 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:30:36Z\",\n \"avg_ns\": 37239670945,\n \"stddev_ns\": 371964037,\n \"avg_ts\": 13.749699,\n \"stddev_ts\": 0.138126,\n \"samples_ns\": [ 37472081479, 37436269476, 36810661881 ],\n \"samples_ts\": [ 13.6635, 13.6766, 13.909 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T00:30:22Z", - "avg_ns": 3429073822, - "stddev_ns": 8802524, - "avg_ts": 37.328027, - "stddev_ts": 0.09575, - "samples_ns": [ - 3427392670, - 3421233508, - 3438595290 - ], - "samples_ts": [ - 37.3462, - 37.4134, - 37.2245 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T00:30:36Z", - "avg_ns": 37239670945, - "stddev_ns": 371964037, - "avg_ts": 13.749699, - "stddev_ts": 0.138126, - "samples_ns": [ - 37472081479, - 37436269476, - 36810661881 - ], - "samples_ts": [ - 13.6635, - 13.6766, - 13.909 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 257 - }, - { - "timestamp_utc": "2025-12-09T00:33:54.714102+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:32:29Z\",\n \"avg_ns\": 13836377036,\n \"stddev_ns\": 6300398,\n \"avg_ts\": 37.003911,\n \"stddev_ts\": 0.016851,\n \"samples_ns\": [ 13837187980, 13829710430, 13842232698 ],\n \"samples_ts\": [ 37.0017, 37.0217, 36.9883 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:33:25Z\",\n \"avg_ns\": 9779473564,\n \"stddev_ns\": 359072835,\n \"avg_ts\": 13.100169,\n \"stddev_ts\": 0.471042,\n \"samples_ns\": [ 9581042750, 10193970117, 9563407826 ],\n \"samples_ts\": [ 13.3597, 12.5564, 13.3844 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T00:32:29Z", - "avg_ns": 13836377036, - "stddev_ns": 6300398, - "avg_ts": 37.003911, - "stddev_ts": 0.016851, - "samples_ns": [ - 13837187980, - 13829710430, - 13842232698 - ], - "samples_ts": [ - 37.0017, - 37.0217, - 36.9883 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T00:33:25Z", - "avg_ns": 9779473564, - "stddev_ns": 359072835, - "avg_ts": 13.100169, - "stddev_ts": 0.471042, - "samples_ns": [ - 9581042750, - 10193970117, - 9563407826 - ], - "samples_ts": [ - 13.3597, - 12.5564, - 13.3844 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 258 - }, - { - "timestamp_utc": "2025-12-09T00:36:43.842221+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:33:55Z\",\n \"avg_ns\": 14320724162,\n \"stddev_ns\": 302581303,\n \"avg_ts\": 35.762898,\n \"stddev_ts\": 0.746575,\n \"samples_ns\": [ 14156671342, 14669903003, 14135598143 ],\n \"samples_ts\": [ 36.1667, 34.9014, 36.2206 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:34:53Z\",\n \"avg_ns\": 36831170653,\n \"stddev_ns\": 62882365,\n \"avg_ts\": 13.901296,\n \"stddev_ts\": 0.023728,\n \"samples_ns\": [ 36896626889, 36825658877, 36771226195 ],\n \"samples_ts\": [ 13.8766, 13.9033, 13.9239 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T00:33:55Z", - "avg_ns": 14320724162, - "stddev_ns": 302581303, - "avg_ts": 35.762898, - "stddev_ts": 0.746575, - "samples_ns": [ - 14156671342, - 14669903003, - 14135598143 - ], - "samples_ts": [ - 36.1667, - 34.9014, - 36.2206 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T00:34:53Z", - "avg_ns": 36831170653, - "stddev_ns": 62882365, - "avg_ts": 13.901296, - "stddev_ts": 0.023728, - "samples_ns": [ - 36896626889, - 36825658877, - 36771226195 - ], - "samples_ts": [ - 13.8766, - 13.9033, - 13.9239 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 259 - }, - { - "timestamp_utc": "2025-12-09T00:37:27.166009+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:36:44Z\",\n \"avg_ns\": 3628845450,\n \"stddev_ns\": 316460952,\n \"avg_ts\": 35.444028,\n \"stddev_ts\": 2.943026,\n \"samples_ns\": [ 3451665631, 3994207856, 3440662864 ],\n \"samples_ts\": [ 37.0835, 32.0464, 37.2021 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:36:58Z\",\n \"avg_ns\": 9354416654,\n \"stddev_ns\": 330446033,\n \"avg_ts\": 13.694541,\n \"stddev_ts\": 0.474108,\n \"samples_ns\": [ 9169720599, 9735918772, 9157610593 ],\n \"samples_ts\": [ 13.959, 13.1472, 13.9774 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T00:36:44Z", - "avg_ns": 3628845450, - "stddev_ns": 316460952, - "avg_ts": 35.444028, - "stddev_ts": 2.943026, - "samples_ns": [ - 3451665631, - 3994207856, - 3440662864 - ], - "samples_ts": [ - 37.0835, - 32.0464, - 37.2021 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T00:36:58Z", - "avg_ns": 9354416654, - "stddev_ns": 330446033, - "avg_ts": 13.694541, - "stddev_ts": 0.474108, - "samples_ns": [ - 9169720599, - 9735918772, - 9157610593 - ], - "samples_ts": [ - 13.959, - 13.1472, - 13.9774 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 260 - }, - { - "timestamp_utc": "2025-12-09T00:39:35.349252+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:37:27Z\",\n \"avg_ns\": 3610594558,\n \"stddev_ns\": 324238537,\n \"avg_ts\": 35.633396,\n \"stddev_ts\": 3.042915,\n \"samples_ns\": [ 3413845405, 3984827704, 3433110567 ],\n \"samples_ts\": [ 37.4944, 32.1218, 37.284 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:37:42Z\",\n \"avg_ns\": 37666696924,\n \"stddev_ns\": 380826102,\n \"avg_ts\": 13.593831,\n \"stddev_ts\": 0.136699,\n \"samples_ns\": [ 38102938008, 37496522266, 37400630498 ],\n \"samples_ts\": [ 13.4373, 13.6546, 13.6896 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T00:37:27Z", - "avg_ns": 3610594558, - "stddev_ns": 324238537, - "avg_ts": 35.633396, - "stddev_ts": 3.042915, - "samples_ns": [ - 3413845405, - 3984827704, - 3433110567 - ], - "samples_ts": [ - 37.4944, - 32.1218, - 37.284 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T00:37:42Z", - "avg_ns": 37666696924, - "stddev_ns": 380826102, - "avg_ts": 13.593831, - "stddev_ts": 0.136699, - "samples_ns": [ - 38102938008, - 37496522266, - 37400630498 - ], - "samples_ts": [ - 13.4373, - 13.6546, - 13.6896 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 261 - }, - { - "timestamp_utc": "2025-12-09T00:41:01.426950+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:39:36Z\",\n \"avg_ns\": 13994182500,\n \"stddev_ns\": 11119395,\n \"avg_ts\": 36.586647,\n \"stddev_ts\": 0.029070,\n \"samples_ns\": [ 14005113878, 13994548252, 13982885371 ],\n \"samples_ts\": [ 36.5581, 36.5857, 36.6162 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:40:32Z\",\n \"avg_ns\": 9530442067,\n \"stddev_ns\": 56417366,\n \"avg_ts\": 13.430959,\n \"stddev_ts\": 0.079237,\n \"samples_ns\": [ 9595569969, 9499176770, 9496579462 ],\n \"samples_ts\": [ 13.3395, 13.4749, 13.4785 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T00:39:36Z", - "avg_ns": 13994182500, - "stddev_ns": 11119395, - "avg_ts": 36.586647, - "stddev_ts": 0.02907, - "samples_ns": [ - 14005113878, - 13994548252, - 13982885371 - ], - "samples_ts": [ - 36.5581, - 36.5857, - 36.6162 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T00:40:32Z", - "avg_ns": 9530442067, - "stddev_ns": 56417366, - "avg_ts": 13.430959, - "stddev_ts": 0.079237, - "samples_ns": [ - 9595569969, - 9499176770, - 9496579462 - ], - "samples_ts": [ - 13.3395, - 13.4749, - 13.4785 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 262 - }, - { - "timestamp_utc": "2025-12-09T00:43:51.987760+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:41:02Z\",\n \"avg_ns\": 13957167273,\n \"stddev_ns\": 21616399,\n \"avg_ts\": 36.683720,\n \"stddev_ts\": 0.056831,\n \"samples_ns\": [ 13934193430, 13977103615, 13960204776 ],\n \"samples_ts\": [ 36.7441, 36.6313, 36.6757 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:41:58Z\",\n \"avg_ns\": 37919115560,\n \"stddev_ns\": 447976689,\n \"avg_ts\": 13.503688,\n \"stddev_ts\": 0.160461,\n \"samples_ns\": [ 38256793203, 37410919300, 38089634177 ],\n \"samples_ts\": [ 13.3832, 13.6858, 13.442 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T00:41:02Z", - "avg_ns": 13957167273, - "stddev_ns": 21616399, - "avg_ts": 36.68372, - "stddev_ts": 0.056831, - "samples_ns": [ - 13934193430, - 13977103615, - 13960204776 - ], - "samples_ts": [ - 36.7441, - 36.6313, - 36.6757 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T00:41:58Z", - "avg_ns": 37919115560, - "stddev_ns": 447976689, - "avg_ts": 13.503688, - "stddev_ts": 0.160461, - "samples_ns": [ - 38256793203, - 37410919300, - 38089634177 - ], - "samples_ts": [ - 13.3832, - 13.6858, - 13.442 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 263 - }, - { - "timestamp_utc": "2025-12-09T00:44:33.948747+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:43:52Z\",\n \"avg_ns\": 3426429465,\n \"stddev_ns\": 7777819,\n \"avg_ts\": 37.356800,\n \"stddev_ts\": 0.084833,\n \"samples_ns\": [ 3418191941, 3433646362, 3427450093 ],\n \"samples_ts\": [ 37.4467, 37.2782, 37.3455 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:44:06Z\",\n \"avg_ns\": 9113973866,\n \"stddev_ns\": 34155207,\n \"avg_ts\": 14.044499,\n \"stddev_ts\": 0.052581,\n \"samples_ns\": [ 9107847226, 9150777627, 9083296746 ],\n \"samples_ts\": [ 14.0538, 13.9879, 14.0918 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T00:43:52Z", - "avg_ns": 3426429465, - "stddev_ns": 7777819, - "avg_ts": 37.3568, - "stddev_ts": 0.084833, - "samples_ns": [ - 3418191941, - 3433646362, - 3427450093 - ], - "samples_ts": [ - 37.4467, - 37.2782, - 37.3455 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T00:44:06Z", - "avg_ns": 9113973866, - "stddev_ns": 34155207, - "avg_ts": 14.044499, - "stddev_ts": 0.052581, - "samples_ns": [ - 9107847226, - 9150777627, - 9083296746 - ], - "samples_ts": [ - 14.0538, - 13.9879, - 14.0918 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 264 - }, - { - "timestamp_utc": "2025-12-09T00:46:40.409480+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:44:34Z\",\n \"avg_ns\": 3428627713,\n \"stddev_ns\": 632683,\n \"avg_ts\": 37.332721,\n \"stddev_ts\": 0.006889,\n \"samples_ns\": [ 3429239601, 3428667433, 3427976105 ],\n \"samples_ts\": [ 37.3261, 37.3323, 37.3398 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:44:48Z\",\n \"avg_ns\": 37269632663,\n \"stddev_ns\": 773168103,\n \"avg_ts\": 13.741623,\n \"stddev_ts\": 0.281802,\n \"samples_ns\": [ 36887616819, 38159451549, 36761829623 ],\n \"samples_ts\": [ 13.88, 13.4174, 13.9275 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T00:44:34Z", - "avg_ns": 3428627713, - "stddev_ns": 632683, - "avg_ts": 37.332721, - "stddev_ts": 0.006889, - "samples_ns": [ - 3429239601, - 3428667433, - 3427976105 - ], - "samples_ts": [ - 37.3261, - 37.3323, - 37.3398 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T00:44:48Z", - "avg_ns": 37269632663, - "stddev_ns": 773168103, - "avg_ts": 13.741623, - "stddev_ts": 0.281802, - "samples_ns": [ - 36887616819, - 38159451549, - 36761829623 - ], - "samples_ts": [ - 13.88, - 13.4174, - 13.9275 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 265 - }, - { - "timestamp_utc": "2025-12-09T00:48:05.660087+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:46:41Z\",\n \"avg_ns\": 14148334569,\n \"stddev_ns\": 299226919,\n \"avg_ts\": 36.198931,\n \"stddev_ts\": 0.774929,\n \"samples_ns\": [ 13803299993, 14336666122, 14305037593 ],\n \"samples_ts\": [ 37.0926, 35.7126, 35.7916 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:47:37Z\",\n \"avg_ns\": 9356037042,\n \"stddev_ns\": 345922165,\n \"avg_ts\": 13.693240,\n \"stddev_ts\": 0.496334,\n \"samples_ns\": [ 9752904912, 9118436204, 9196770012 ],\n \"samples_ts\": [ 13.1243, 14.0375, 13.9179 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T00:46:41Z", - "avg_ns": 14148334569, - "stddev_ns": 299226919, - "avg_ts": 36.198931, - "stddev_ts": 0.774929, - "samples_ns": [ - 13803299993, - 14336666122, - 14305037593 - ], - "samples_ts": [ - 37.0926, - 35.7126, - 35.7916 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T00:47:37Z", - "avg_ns": 9356037042, - "stddev_ns": 345922165, - "avg_ts": 13.69324, - "stddev_ts": 0.496334, - "samples_ns": [ - 9752904912, - 9118436204, - 9196770012 - ], - "samples_ts": [ - 13.1243, - 14.0375, - 13.9179 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 266 - }, - { - "timestamp_utc": "2025-12-09T00:50:55.206314+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:48:06Z\",\n \"avg_ns\": 14172796470,\n \"stddev_ns\": 335137892,\n \"avg_ts\": 36.138852,\n \"stddev_ts\": 0.844150,\n \"samples_ns\": [ 14555724354, 14029727463, 13932937594 ],\n \"samples_ts\": [ 35.1752, 36.4939, 36.7475 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:49:03Z\",\n \"avg_ns\": 37202660192,\n \"stddev_ns\": 369124203,\n \"avg_ts\": 13.763365,\n \"stddev_ts\": 0.137334,\n \"samples_ns\": [ 37393480652, 37437315625, 36777184299 ],\n \"samples_ts\": [ 13.6922, 13.6762, 13.9217 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T00:48:06Z", - "avg_ns": 14172796470, - "stddev_ns": 335137892, - "avg_ts": 36.138852, - "stddev_ts": 0.84415, - "samples_ns": [ - 14555724354, - 14029727463, - 13932937594 - ], - "samples_ts": [ - 35.1752, - 36.4939, - 36.7475 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T00:49:03Z", - "avg_ns": 37202660192, - "stddev_ns": 369124203, - "avg_ts": 13.763365, - "stddev_ts": 0.137334, - "samples_ns": [ - 37393480652, - 37437315625, - 36777184299 - ], - "samples_ts": [ - 13.6922, - 13.6762, - 13.9217 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 267 - }, - { - "timestamp_utc": "2025-12-09T00:51:38.377033+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:50:55Z\",\n \"avg_ns\": 3604853641,\n \"stddev_ns\": 311477405,\n \"avg_ts\": 35.676817,\n \"stddev_ts\": 2.936184,\n \"samples_ns\": [ 3964516089, 3425627943, 3424416891 ],\n \"samples_ts\": [ 32.2864, 37.3654, 37.3786 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:51:10Z\",\n \"avg_ns\": 9337239487,\n \"stddev_ns\": 384042745,\n \"avg_ts\": 13.723667,\n \"stddev_ts\": 0.551383,\n \"samples_ns\": [ 9108896262, 9780627379, 9122194820 ],\n \"samples_ts\": [ 14.0522, 13.0871, 14.0317 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T00:50:55Z", - "avg_ns": 3604853641, - "stddev_ns": 311477405, - "avg_ts": 35.676817, - "stddev_ts": 2.936184, - "samples_ns": [ - 3964516089, - 3425627943, - 3424416891 - ], - "samples_ts": [ - 32.2864, - 37.3654, - 37.3786 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T00:51:10Z", - "avg_ns": 9337239487, - "stddev_ns": 384042745, - "avg_ts": 13.723667, - "stddev_ts": 0.551383, - "samples_ns": [ - 9108896262, - 9780627379, - 9122194820 - ], - "samples_ts": [ - 14.0522, - 13.0871, - 14.0317 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 268 - }, - { - "timestamp_utc": "2025-12-09T00:53:46.737919+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:51:39Z\",\n \"avg_ns\": 3462771066,\n \"stddev_ns\": 10081293,\n \"avg_ts\": 36.964824,\n \"stddev_ts\": 0.107622,\n \"samples_ns\": [ 3452627543, 3462897051, 3472788605 ],\n \"samples_ts\": [ 37.0732, 36.9633, 36.858 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:51:53Z\",\n \"avg_ns\": 37853294608,\n \"stddev_ns\": 330821695,\n \"avg_ts\": 13.526596,\n \"stddev_ts\": 0.118816,\n \"samples_ns\": [ 38043322711, 38045264849, 37471296264 ],\n \"samples_ts\": [ 13.4583, 13.4577, 13.6638 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T00:51:39Z", - "avg_ns": 3462771066, - "stddev_ns": 10081293, - "avg_ts": 36.964824, - "stddev_ts": 0.107622, - "samples_ns": [ - 3452627543, - 3462897051, - 3472788605 - ], - "samples_ts": [ - 37.0732, - 36.9633, - 36.858 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T00:51:53Z", - "avg_ns": 37853294608, - "stddev_ns": 330821695, - "avg_ts": 13.526596, - "stddev_ts": 0.118816, - "samples_ns": [ - 38043322711, - 38045264849, - 37471296264 - ], - "samples_ts": [ - 13.4583, - 13.4577, - 13.6638 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 269 - }, - { - "timestamp_utc": "2025-12-09T00:55:11.066041+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:53:47Z\",\n \"avg_ns\": 14042010903,\n \"stddev_ns\": 334861532,\n \"avg_ts\": 36.475655,\n \"stddev_ts\": 0.858036,\n \"samples_ns\": [ 14428639992, 13853245300, 13844147418 ],\n \"samples_ts\": [ 35.485, 36.9588, 36.9831 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:54:43Z\",\n \"avg_ns\": 9132129622,\n \"stddev_ns\": 15015540,\n \"avg_ts\": 14.016471,\n \"stddev_ts\": 0.023064,\n \"samples_ns\": [ 9115186221, 9143786947, 9137415699 ],\n \"samples_ts\": [ 14.0425, 13.9986, 14.0083 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T00:53:47Z", - "avg_ns": 14042010903, - "stddev_ns": 334861532, - "avg_ts": 36.475655, - "stddev_ts": 0.858036, - "samples_ns": [ - 14428639992, - 13853245300, - 13844147418 - ], - "samples_ts": [ - 35.485, - 36.9588, - 36.9831 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T00:54:43Z", - "avg_ns": 9132129622, - "stddev_ns": 15015540, - "avg_ts": 14.016471, - "stddev_ts": 0.023064, - "samples_ns": [ - 9115186221, - 9143786947, - 9137415699 - ], - "samples_ts": [ - 14.0425, - 13.9986, - 14.0083 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 270 - }, - { - "timestamp_utc": "2025-12-09T00:58:00.809803+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:55:11Z\",\n \"avg_ns\": 14246757159,\n \"stddev_ns\": 345131780,\n \"avg_ts\": 35.952267,\n \"stddev_ts\": 0.883237,\n \"samples_ns\": [ 14458392513, 13848494951, 14433384013 ],\n \"samples_ts\": [ 35.412, 36.9715, 35.4733 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:56:08Z\",\n \"avg_ns\": 37413488373,\n \"stddev_ns\": 635468246,\n \"avg_ts\": 13.687543,\n \"stddev_ts\": 0.233058,\n \"samples_ns\": [ 36750715717, 38017581865, 37472167537 ],\n \"samples_ts\": [ 13.9317, 13.4675, 13.6635 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T00:55:11Z", - "avg_ns": 14246757159, - "stddev_ns": 345131780, - "avg_ts": 35.952267, - "stddev_ts": 0.883237, - "samples_ns": [ - 14458392513, - 13848494951, - 14433384013 - ], - "samples_ts": [ - 35.412, - 36.9715, - 35.4733 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T00:56:08Z", - "avg_ns": 37413488373, - "stddev_ns": 635468246, - "avg_ts": 13.687543, - "stddev_ts": 0.233058, - "samples_ns": [ - 36750715717, - 38017581865, - 37472167537 - ], - "samples_ts": [ - 13.9317, - 13.4675, - 13.6635 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 271 - }, - { - "timestamp_utc": "2025-12-09T00:58:43.581776+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:58:01Z\",\n \"avg_ns\": 3434460273,\n \"stddev_ns\": 9571829,\n \"avg_ts\": 37.269513,\n \"stddev_ts\": 0.103883,\n \"samples_ns\": [ 3434829347, 3443841869, 3424709605 ],\n \"samples_ts\": [ 37.2653, 37.1678, 37.3754 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:58:15Z\",\n \"avg_ns\": 9357924478,\n \"stddev_ns\": 347141792,\n \"avg_ts\": 13.690545,\n \"stddev_ts\": 0.497285,\n \"samples_ns\": [ 9144734781, 9170546489, 9758492165 ],\n \"samples_ts\": [ 13.9971, 13.9577, 13.1168 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T00:58:01Z", - "avg_ns": 3434460273, - "stddev_ns": 9571829, - "avg_ts": 37.269513, - "stddev_ts": 0.103883, - "samples_ns": [ - 3434829347, - 3443841869, - 3424709605 - ], - "samples_ts": [ - 37.2653, - 37.1678, - 37.3754 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T00:58:15Z", - "avg_ns": 9357924478, - "stddev_ns": 347141792, - "avg_ts": 13.690545, - "stddev_ts": 0.497285, - "samples_ns": [ - 9144734781, - 9170546489, - 9758492165 - ], - "samples_ts": [ - 13.9971, - 13.9577, - 13.1168 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 272 - }, - { - "timestamp_utc": "2025-12-09T01:00:51.979978+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:58:44Z\",\n \"avg_ns\": 3620762822,\n \"stddev_ns\": 333456925,\n \"avg_ts\": 35.542535,\n \"stddev_ts\": 3.108787,\n \"samples_ns\": [ 3418431963, 4005636168, 3438220337 ],\n \"samples_ts\": [ 37.4441, 31.955, 37.2286 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:58:58Z\",\n \"avg_ns\": 37724853277,\n \"stddev_ns\": 367852647,\n \"avg_ts\": 13.572810,\n \"stddev_ts\": 0.131644,\n \"samples_ns\": [ 37475035545, 37552257825, 38147266463 ],\n \"samples_ts\": [ 13.6624, 13.6343, 13.4217 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T00:58:44Z", - "avg_ns": 3620762822, - "stddev_ns": 333456925, - "avg_ts": 35.542535, - "stddev_ts": 3.108787, - "samples_ns": [ - 3418431963, - 4005636168, - 3438220337 - ], - "samples_ts": [ - 37.4441, - 31.955, - 37.2286 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T00:58:58Z", - "avg_ns": 37724853277, - "stddev_ns": 367852647, - "avg_ts": 13.57281, - "stddev_ts": 0.131644, - "samples_ns": [ - 37475035545, - 37552257825, - 38147266463 - ], - "samples_ts": [ - 13.6624, - 13.6343, - 13.4217 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 273 - }, - { - "timestamp_utc": "2025-12-09T01:02:16.879209+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:00:52Z\",\n \"avg_ns\": 14192808569,\n \"stddev_ns\": 310783506,\n \"avg_ts\": 36.086000,\n \"stddev_ts\": 0.780408,\n \"samples_ns\": [ 14551317627, 13999777202, 14027330880 ],\n \"samples_ts\": [ 35.1858, 36.572, 36.5002 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:01:49Z\",\n \"avg_ns\": 9122748767,\n \"stddev_ns\": 29190988,\n \"avg_ts\": 14.030955,\n \"stddev_ts\": 0.044947,\n \"samples_ns\": [ 9090623291, 9147647746, 9129975264 ],\n \"samples_ts\": [ 14.0804, 13.9927, 14.0198 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T01:00:52Z", - "avg_ns": 14192808569, - "stddev_ns": 310783506, - "avg_ts": 36.086, - "stddev_ts": 0.780408, - "samples_ns": [ - 14551317627, - 13999777202, - 14027330880 - ], - "samples_ts": [ - 35.1858, - 36.572, - 36.5002 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T01:01:49Z", - "avg_ns": 9122748767, - "stddev_ns": 29190988, - "avg_ts": 14.030955, - "stddev_ts": 0.044947, - "samples_ns": [ - 9090623291, - 9147647746, - 9129975264 - ], - "samples_ts": [ - 14.0804, - 13.9927, - 14.0198 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 274 - }, - { - "timestamp_utc": "2025-12-09T01:05:04.376278+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:02:17Z\",\n \"avg_ns\": 13965429996,\n \"stddev_ns\": 16407068,\n \"avg_ts\": 36.661991,\n \"stddev_ts\": 0.043100,\n \"samples_ns\": [ 13946504605, 13974151002, 13975634382 ],\n \"samples_ts\": [ 36.7117, 36.6391, 36.6352 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:03:13Z\",\n \"avg_ns\": 36881177735,\n \"stddev_ns\": 34664877,\n \"avg_ts\": 13.882428,\n \"stddev_ts\": 0.013042,\n \"samples_ns\": [ 36920684695, 36866993545, 36855854966 ],\n \"samples_ts\": [ 13.8676, 13.8878, 13.892 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T01:02:17Z", - "avg_ns": 13965429996, - "stddev_ns": 16407068, - "avg_ts": 36.661991, - "stddev_ts": 0.0431, - "samples_ns": [ - 13946504605, - 13974151002, - 13975634382 - ], - "samples_ts": [ - 36.7117, - 36.6391, - 36.6352 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T01:03:13Z", - "avg_ns": 36881177735, - "stddev_ns": 34664877, - "avg_ts": 13.882428, - "stddev_ts": 0.013042, - "samples_ns": [ - 36920684695, - 36866993545, - 36855854966 - ], - "samples_ts": [ - 13.8676, - 13.8878, - 13.892 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 275 - }, - { - "timestamp_utc": "2025-12-09T01:05:46.614492+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:05:05Z\",\n \"avg_ns\": 3434047166,\n \"stddev_ns\": 5665761,\n \"avg_ts\": 37.273871,\n \"stddev_ts\": 0.061519,\n \"samples_ns\": [ 3434995002, 3439178926, 3427967571 ],\n \"samples_ts\": [ 37.2635, 37.2182, 37.3399 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:05:18Z\",\n \"avg_ns\": 9186503243,\n \"stddev_ns\": 25311275,\n \"avg_ts\": 13.933555,\n \"stddev_ts\": 0.038340,\n \"samples_ns\": [ 9215192427, 9167325567, 9176991735 ],\n \"samples_ts\": [ 13.8901, 13.9626, 13.9479 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T01:05:05Z", - "avg_ns": 3434047166, - "stddev_ns": 5665761, - "avg_ts": 37.273871, - "stddev_ts": 0.061519, - "samples_ns": [ - 3434995002, - 3439178926, - 3427967571 - ], - "samples_ts": [ - 37.2635, - 37.2182, - 37.3399 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T01:05:18Z", - "avg_ns": 9186503243, - "stddev_ns": 25311275, - "avg_ts": 13.933555, - "stddev_ts": 0.03834, - "samples_ns": [ - 9215192427, - 9167325567, - 9176991735 - ], - "samples_ts": [ - 13.8901, - 13.9626, - 13.9479 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 276 - }, - { - "timestamp_utc": "2025-12-09T01:07:51.886800+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:05:47Z\",\n \"avg_ns\": 3443693945,\n \"stddev_ns\": 14472987,\n \"avg_ts\": 37.169826,\n \"stddev_ts\": 0.155850,\n \"samples_ns\": [ 3460339373, 3434082346, 3436660117 ],\n \"samples_ts\": [ 36.9906, 37.2734, 37.2455 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:06:01Z\",\n \"avg_ns\": 36847979393,\n \"stddev_ns\": 65578635,\n \"avg_ts\": 13.894957,\n \"stddev_ts\": 0.024748,\n \"samples_ns\": [ 36900081751, 36774341149, 36869515280 ],\n \"samples_ts\": [ 13.8753, 13.9228, 13.8868 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T01:05:47Z", - "avg_ns": 3443693945, - "stddev_ns": 14472987, - "avg_ts": 37.169826, - "stddev_ts": 0.15585, - "samples_ns": [ - 3460339373, - 3434082346, - 3436660117 - ], - "samples_ts": [ - 36.9906, - 37.2734, - 37.2455 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T01:06:01Z", - "avg_ns": 36847979393, - "stddev_ns": 65578635, - "avg_ts": 13.894957, - "stddev_ts": 0.024748, - "samples_ns": [ - 36900081751, - 36774341149, - 36869515280 - ], - "samples_ts": [ - 13.8753, - 13.9228, - 13.8868 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 277 - }, - { - "timestamp_utc": "2025-12-09T01:09:15.470728+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:07:52Z\",\n \"avg_ns\": 13794633052,\n \"stddev_ns\": 8754928,\n \"avg_ts\": 37.115894,\n \"stddev_ts\": 0.023558,\n \"samples_ns\": [ 13784824147, 13797425583, 13801649428 ],\n \"samples_ts\": [ 37.1423, 37.1084, 37.097 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:08:47Z\",\n \"avg_ns\": 9141718047,\n \"stddev_ns\": 35561439,\n \"avg_ts\": 14.001886,\n \"stddev_ts\": 0.054577,\n \"samples_ns\": [ 9156534345, 9167475616, 9101144181 ],\n \"samples_ts\": [ 13.9791, 13.9624, 14.0642 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T01:07:52Z", - "avg_ns": 13794633052, - "stddev_ns": 8754928, - "avg_ts": 37.115894, - "stddev_ts": 0.023558, - "samples_ns": [ - 13784824147, - 13797425583, - 13801649428 - ], - "samples_ts": [ - 37.1423, - 37.1084, - 37.097 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T01:08:47Z", - "avg_ns": 9141718047, - "stddev_ns": 35561439, - "avg_ts": 14.001886, - "stddev_ts": 0.054577, - "samples_ns": [ - 9156534345, - 9167475616, - 9101144181 - ], - "samples_ts": [ - 13.9791, - 13.9624, - 14.0642 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 278 - }, - { - "timestamp_utc": "2025-12-09T01:12:02.038010+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:09:16Z\",\n \"avg_ns\": 13783841080,\n \"stddev_ns\": 23193088,\n \"avg_ts\": 37.145014,\n \"stddev_ts\": 0.062454,\n \"samples_ns\": [ 13809890125, 13765432226, 13776200890 ],\n \"samples_ts\": [ 37.0749, 37.1946, 37.1655 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:10:11Z\",\n \"avg_ns\": 36832779855,\n \"stddev_ns\": 4603641,\n \"avg_ts\": 13.900662,\n \"stddev_ts\": 0.001736,\n \"samples_ns\": [ 36833204993, 36837152167, 36827982406 ],\n \"samples_ts\": [ 13.9005, 13.899, 13.9025 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T01:09:16Z", - "avg_ns": 13783841080, - "stddev_ns": 23193088, - "avg_ts": 37.145014, - "stddev_ts": 0.062454, - "samples_ns": [ - 13809890125, - 13765432226, - 13776200890 - ], - "samples_ts": [ - 37.0749, - 37.1946, - 37.1655 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T01:10:11Z", - "avg_ns": 36832779855, - "stddev_ns": 4603641, - "avg_ts": 13.900662, - "stddev_ts": 0.001736, - "samples_ns": [ - 36833204993, - 36837152167, - 36827982406 - ], - "samples_ts": [ - 13.9005, - 13.899, - 13.9025 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 279 - }, - { - "timestamp_utc": "2025-12-09T01:12:44.330736+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:12:02Z\",\n \"avg_ns\": 3430598501,\n \"stddev_ns\": 3737415,\n \"avg_ts\": 37.311303,\n \"stddev_ts\": 0.040618,\n \"samples_ns\": [ 3428515012, 3434912729, 3428367763 ],\n \"samples_ts\": [ 37.3339, 37.2644, 37.3356 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:12:16Z\",\n \"avg_ns\": 9214361392,\n \"stddev_ns\": 16349437,\n \"avg_ts\": 13.891388,\n \"stddev_ts\": 0.024625,\n \"samples_ns\": [ 9232998088, 9202435349, 9207650740 ],\n \"samples_ts\": [ 13.8633, 13.9094, 13.9015 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T01:12:02Z", - "avg_ns": 3430598501, - "stddev_ns": 3737415, - "avg_ts": 37.311303, - "stddev_ts": 0.040618, - "samples_ns": [ - 3428515012, - 3434912729, - 3428367763 - ], - "samples_ts": [ - 37.3339, - 37.2644, - 37.3356 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T01:12:16Z", - "avg_ns": 9214361392, - "stddev_ns": 16349437, - "avg_ts": 13.891388, - "stddev_ts": 0.024625, - "samples_ns": [ - 9232998088, - 9202435349, - 9207650740 - ], - "samples_ts": [ - 13.8633, - 13.9094, - 13.9015 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 280 - }, - { - "timestamp_utc": "2025-12-09T01:14:49.780791+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:12:45Z\",\n \"avg_ns\": 3431007219,\n \"stddev_ns\": 17041035,\n \"avg_ts\": 37.307441,\n \"stddev_ts\": 0.184932,\n \"samples_ns\": [ 3449977935, 3416996286, 3426047436 ],\n \"samples_ts\": [ 37.1017, 37.4598, 37.3608 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:12:58Z\",\n \"avg_ns\": 36936751949,\n \"stddev_ns\": 45648517,\n \"avg_ts\": 13.861547,\n \"stddev_ts\": 0.017142,\n \"samples_ns\": [ 36970129530, 36884732719, 36955393598 ],\n \"samples_ts\": [ 13.849, 13.8811, 13.8545 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T01:12:45Z", - "avg_ns": 3431007219, - "stddev_ns": 17041035, - "avg_ts": 37.307441, - "stddev_ts": 0.184932, - "samples_ns": [ - 3449977935, - 3416996286, - 3426047436 - ], - "samples_ts": [ - 37.1017, - 37.4598, - 37.3608 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T01:12:58Z", - "avg_ns": 36936751949, - "stddev_ns": 45648517, - "avg_ts": 13.861547, - "stddev_ts": 0.017142, - "samples_ns": [ - 36970129530, - 36884732719, - 36955393598 - ], - "samples_ts": [ - 13.849, - 13.8811, - 13.8545 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 281 - }, - { - "timestamp_utc": "2025-12-09T01:16:15.136571+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:14:50Z\",\n \"avg_ns\": 13880167411,\n \"stddev_ns\": 34555530,\n \"avg_ts\": 36.887316,\n \"stddev_ts\": 0.091957,\n \"samples_ns\": [ 13840557540, 13904141069, 13895803624 ],\n \"samples_ts\": [ 36.9927, 36.8236, 36.8457 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:15:46Z\",\n \"avg_ns\": 9633197290,\n \"stddev_ns\": 24432084,\n \"avg_ts\": 13.287442,\n \"stddev_ts\": 0.033667,\n \"samples_ns\": [ 9612972328, 9660342983, 9626276560 ],\n \"samples_ts\": [ 13.3153, 13.25, 13.2969 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T01:14:50Z", - "avg_ns": 13880167411, - "stddev_ns": 34555530, - "avg_ts": 36.887316, - "stddev_ts": 0.091957, - "samples_ns": [ - 13840557540, - 13904141069, - 13895803624 - ], - "samples_ts": [ - 36.9927, - 36.8236, - 36.8457 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T01:15:46Z", - "avg_ns": 9633197290, - "stddev_ns": 24432084, - "avg_ts": 13.287442, - "stddev_ts": 0.033667, - "samples_ns": [ - 9612972328, - 9660342983, - 9626276560 - ], - "samples_ts": [ - 13.3153, - 13.25, - 13.2969 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 282 - }, - { - "timestamp_utc": "2025-12-09T01:19:02.695543+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:16:15Z\",\n \"avg_ns\": 13853454505,\n \"stddev_ns\": 5514314,\n \"avg_ts\": 36.958295,\n \"stddev_ts\": 0.014714,\n \"samples_ns\": [ 13856402152, 13856868544, 13847092819 ],\n \"samples_ts\": [ 36.9504, 36.9492, 36.9753 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:17:11Z\",\n \"avg_ns\": 37063611721,\n \"stddev_ns\": 110634966,\n \"avg_ts\": 13.814170,\n \"stddev_ts\": 0.041201,\n \"samples_ns\": [ 37042404956, 37183315061, 36965115146 ],\n \"samples_ts\": [ 13.822, 13.7696, 13.8509 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T01:16:15Z", - "avg_ns": 13853454505, - "stddev_ns": 5514314, - "avg_ts": 36.958295, - "stddev_ts": 0.014714, - "samples_ns": [ - 13856402152, - 13856868544, - 13847092819 - ], - "samples_ts": [ - 36.9504, - 36.9492, - 36.9753 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T01:17:11Z", - "avg_ns": 37063611721, - "stddev_ns": 110634966, - "avg_ts": 13.81417, - "stddev_ts": 0.041201, - "samples_ns": [ - 37042404956, - 37183315061, - 36965115146 - ], - "samples_ts": [ - 13.822, - 13.7696, - 13.8509 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 283 - }, - { - "timestamp_utc": "2025-12-09T01:19:44.927094+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:19:03Z\",\n \"avg_ns\": 3436291611,\n \"stddev_ns\": 6712702,\n \"avg_ts\": 37.249553,\n \"stddev_ts\": 0.072686,\n \"samples_ns\": [ 3443957134, 3433450481, 3431467220 ],\n \"samples_ts\": [ 37.1665, 37.2803, 37.3018 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:19:17Z\",\n \"avg_ns\": 9182592447,\n \"stddev_ns\": 23952333,\n \"avg_ts\": 13.939482,\n \"stddev_ts\": 0.036415,\n \"samples_ns\": [ 9154942351, 9196973583, 9195861408 ],\n \"samples_ts\": [ 13.9815, 13.9176, 13.9193 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T01:19:03Z", - "avg_ns": 3436291611, - "stddev_ns": 6712702, - "avg_ts": 37.249553, - "stddev_ts": 0.072686, - "samples_ns": [ - 3443957134, - 3433450481, - 3431467220 - ], - "samples_ts": [ - 37.1665, - 37.2803, - 37.3018 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T01:19:17Z", - "avg_ns": 9182592447, - "stddev_ns": 23952333, - "avg_ts": 13.939482, - "stddev_ts": 0.036415, - "samples_ns": [ - 9154942351, - 9196973583, - 9195861408 - ], - "samples_ts": [ - 13.9815, - 13.9176, - 13.9193 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 284 - }, - { - "timestamp_utc": "2025-12-09T01:21:51.104749+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:19:45Z\",\n \"avg_ns\": 3424464521,\n \"stddev_ns\": 9379916,\n \"avg_ts\": 37.378293,\n \"stddev_ts\": 0.102328,\n \"samples_ns\": [ 3434404939, 3423219081, 3415769543 ],\n \"samples_ts\": [ 37.2699, 37.3917, 37.4733 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:19:59Z\",\n \"avg_ns\": 37182524409,\n \"stddev_ns\": 18320823,\n \"avg_ts\": 13.769912,\n \"stddev_ts\": 0.006784,\n \"samples_ns\": [ 37183929098, 37200100423, 37163543708 ],\n \"samples_ts\": [ 13.7694, 13.7634, 13.7769 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T01:19:45Z", - "avg_ns": 3424464521, - "stddev_ns": 9379916, - "avg_ts": 37.378293, - "stddev_ts": 0.102328, - "samples_ns": [ - 3434404939, - 3423219081, - 3415769543 - ], - "samples_ts": [ - 37.2699, - 37.3917, - 37.4733 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T01:19:59Z", - "avg_ns": 37182524409, - "stddev_ns": 18320823, - "avg_ts": 13.769912, - "stddev_ts": 0.006784, - "samples_ns": [ - 37183929098, - 37200100423, - 37163543708 - ], - "samples_ts": [ - 13.7694, - 13.7634, - 13.7769 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 285 - }, - { - "timestamp_utc": "2025-12-09T01:23:15.754808+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:21:51Z\",\n \"avg_ns\": 14049957085,\n \"stddev_ns\": 48840951,\n \"avg_ts\": 36.441686,\n \"stddev_ts\": 0.126500,\n \"samples_ns\": [ 14035294213, 14104449677, 14010127366 ],\n \"samples_ts\": [ 36.4795, 36.3006, 36.545 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:22:48Z\",\n \"avg_ns\": 9177685697,\n \"stddev_ns\": 41877588,\n \"avg_ts\": 13.947065,\n \"stddev_ts\": 0.063807,\n \"samples_ns\": [ 9200418222, 9203280991, 9129357878 ],\n \"samples_ts\": [ 13.9124, 13.9081, 14.0207 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T01:21:51Z", - "avg_ns": 14049957085, - "stddev_ns": 48840951, - "avg_ts": 36.441686, - "stddev_ts": 0.1265, - "samples_ns": [ - 14035294213, - 14104449677, - 14010127366 - ], - "samples_ts": [ - 36.4795, - 36.3006, - 36.545 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T01:22:48Z", - "avg_ns": 9177685697, - "stddev_ns": 41877588, - "avg_ts": 13.947065, - "stddev_ts": 0.063807, - "samples_ns": [ - 9200418222, - 9203280991, - 9129357878 - ], - "samples_ts": [ - 13.9124, - 13.9081, - 14.0207 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 286 - }, - { - "timestamp_utc": "2025-12-09T01:26:03.150132+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:23:16Z\",\n \"avg_ns\": 14015528361,\n \"stddev_ns\": 5231950,\n \"avg_ts\": 36.530913,\n \"stddev_ts\": 0.013629,\n \"samples_ns\": [ 14010731214, 14021102751, 14014751120 ],\n \"samples_ts\": [ 36.5434, 36.5164, 36.5329 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:24:12Z\",\n \"avg_ns\": 36790616286,\n \"stddev_ns\": 44945922,\n \"avg_ts\": 13.916606,\n \"stddev_ts\": 0.016997,\n \"samples_ns\": [ 36749177112, 36838394981, 36784276766 ],\n \"samples_ts\": [ 13.9323, 13.8985, 13.919 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T01:23:16Z", - "avg_ns": 14015528361, - "stddev_ns": 5231950, - "avg_ts": 36.530913, - "stddev_ts": 0.013629, - "samples_ns": [ - 14010731214, - 14021102751, - 14014751120 - ], - "samples_ts": [ - 36.5434, - 36.5164, - 36.5329 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_type": "gemma3 1B Q4_K - Medium", - "model_size": 799525120, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T01:24:12Z", - "avg_ns": 36790616286, - "stddev_ns": 44945922, - "avg_ts": 13.916606, - "stddev_ts": 0.016997, - "samples_ns": [ - 36749177112, - 36838394981, - 36784276766 - ], - "samples_ts": [ - 13.9323, - 13.8985, - 13.919 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 287 - }, - { - "timestamp_utc": "2025-12-09T01:27:24.835294+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:26:11Z\",\n \"avg_ns\": 5502647593,\n \"stddev_ns\": 30616759,\n \"avg_ts\": 23.262011,\n \"stddev_ts\": 0.129724,\n \"samples_ns\": [ 5527656554, 5468503130, 5511783097 ],\n \"samples_ts\": [ 23.1563, 23.4068, 23.223 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:26:33Z\",\n \"avg_ns\": 17171905733,\n \"stddev_ns\": 46536519,\n \"avg_ts\": 7.454072,\n \"stddev_ts\": 0.020228,\n \"samples_ns\": [ 17119051899, 17189936115, 17206729185 ],\n \"samples_ts\": [ 7.47705, 7.44622, 7.43895 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T01:26:11Z", - "avg_ns": 5502647593, - "stddev_ns": 30616759, - "avg_ts": 23.262011, - "stddev_ts": 0.129724, - "samples_ns": [ - 5527656554, - 5468503130, - 5511783097 - ], - "samples_ts": [ - 23.1563, - 23.4068, - 23.223 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T01:26:33Z", - "avg_ns": 17171905733, - "stddev_ns": 46536519, - "avg_ts": 7.454072, - "stddev_ts": 0.020228, - "samples_ns": [ - 17119051899, - 17189936115, - 17206729185 - ], - "samples_ts": [ - 7.47705, - 7.44622, - 7.43895 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 288 - }, - { - "timestamp_utc": "2025-12-09T01:31:15.719886+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:27:25Z\",\n \"avg_ns\": 5414467611,\n \"stddev_ns\": 11745444,\n \"avg_ts\": 23.640441,\n \"stddev_ts\": 0.051321,\n \"samples_ns\": [ 5401530816, 5424461566, 5417410452 ],\n \"samples_ts\": [ 23.697, 23.5968, 23.6275 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:27:47Z\",\n \"avg_ns\": 69427419872,\n \"stddev_ns\": 131252701,\n \"avg_ts\": 7.374625,\n \"stddev_ts\": 0.013929,\n \"samples_ns\": [ 69327198760, 69575988182, 69379072676 ],\n \"samples_ts\": [ 7.38527, 7.35886, 7.37975 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T01:27:25Z", - "avg_ns": 5414467611, - "stddev_ns": 11745444, - "avg_ts": 23.640441, - "stddev_ts": 0.051321, - "samples_ns": [ - 5401530816, - 5424461566, - 5417410452 - ], - "samples_ts": [ - 23.697, - 23.5968, - 23.6275 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T01:27:47Z", - "avg_ns": 69427419872, - "stddev_ns": 131252701, - "avg_ts": 7.374625, - "stddev_ts": 0.013929, - "samples_ns": [ - 69327198760, - 69575988182, - 69379072676 - ], - "samples_ts": [ - 7.38527, - 7.35886, - 7.37975 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 289 - }, - { - "timestamp_utc": "2025-12-09T01:33:36.920013+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:31:16Z\",\n \"avg_ns\": 21876366473,\n \"stddev_ns\": 25868848,\n \"avg_ts\": 23.404274,\n \"stddev_ts\": 0.027688,\n \"samples_ns\": [ 21897739231, 21883751602, 21847608587 ],\n \"samples_ts\": [ 23.3814, 23.3964, 23.4351 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:32:43Z\",\n \"avg_ns\": 17602951421,\n \"stddev_ns\": 44748078,\n \"avg_ts\": 7.271539,\n \"stddev_ts\": 0.018489,\n \"samples_ns\": [ 17556990511, 17646378181, 17605485573 ],\n \"samples_ts\": [ 7.29054, 7.25361, 7.27046 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T01:31:16Z", - "avg_ns": 21876366473, - "stddev_ns": 25868848, - "avg_ts": 23.404274, - "stddev_ts": 0.027688, - "samples_ns": [ - 21897739231, - 21883751602, - 21847608587 - ], - "samples_ts": [ - 23.3814, - 23.3964, - 23.4351 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T01:32:43Z", - "avg_ns": 17602951421, - "stddev_ns": 44748078, - "avg_ts": 7.271539, - "stddev_ts": 0.018489, - "samples_ns": [ - 17556990511, - 17646378181, - 17605485573 - ], - "samples_ts": [ - 7.29054, - 7.25361, - 7.27046 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 290 - }, - { - "timestamp_utc": "2025-12-09T01:38:39.002755+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:33:37Z\",\n \"avg_ns\": 21804867324,\n \"stddev_ns\": 20916046,\n \"avg_ts\": 23.481010,\n \"stddev_ts\": 0.022532,\n \"samples_ns\": [ 21822007448, 21811032300, 21781562225 ],\n \"samples_ts\": [ 23.4626, 23.4744, 23.5061 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:35:04Z\",\n \"avg_ns\": 71335514028,\n \"stddev_ns\": 35352152,\n \"avg_ts\": 7.177352,\n \"stddev_ts\": 0.003556,\n \"samples_ns\": [ 71373769121, 71304055912, 71328717053 ],\n \"samples_ts\": [ 7.1735, 7.18052, 7.17803 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T01:33:37Z", - "avg_ns": 21804867324, - "stddev_ns": 20916046, - "avg_ts": 23.48101, - "stddev_ts": 0.022532, - "samples_ns": [ - 21822007448, - 21811032300, - 21781562225 - ], - "samples_ts": [ - 23.4626, - 23.4744, - 23.5061 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T01:35:04Z", - "avg_ns": 71335514028, - "stddev_ns": 35352152, - "avg_ts": 7.177352, - "stddev_ts": 0.003556, - "samples_ns": [ - 71373769121, - 71304055912, - 71328717053 - ], - "samples_ts": [ - 7.1735, - 7.18052, - 7.17803 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 291 - }, - { - "timestamp_utc": "2025-12-09T01:39:53.334969+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:38:39Z\",\n \"avg_ns\": 5419347489,\n \"stddev_ns\": 4115049,\n \"avg_ts\": 23.619089,\n \"stddev_ts\": 0.017942,\n \"samples_ns\": [ 5421414184, 5422019572, 5414608711 ],\n \"samples_ts\": [ 23.6101, 23.6074, 23.6398 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:39:01Z\",\n \"avg_ns\": 17236380555,\n \"stddev_ns\": 38430619,\n \"avg_ts\": 7.426178,\n \"stddev_ts\": 0.016576,\n \"samples_ns\": [ 17251013775, 17265344989, 17192782901 ],\n \"samples_ts\": [ 7.41985, 7.41369, 7.44498 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T01:38:39Z", - "avg_ns": 5419347489, - "stddev_ns": 4115049, - "avg_ts": 23.619089, - "stddev_ts": 0.017942, - "samples_ns": [ - 5421414184, - 5422019572, - 5414608711 - ], - "samples_ts": [ - 23.6101, - 23.6074, - 23.6398 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T01:39:01Z", - "avg_ns": 17236380555, - "stddev_ns": 38430619, - "avg_ts": 7.426178, - "stddev_ts": 0.016576, - "samples_ns": [ - 17251013775, - 17265344989, - 17192782901 - ], - "samples_ts": [ - 7.41985, - 7.41369, - 7.44498 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 292 - }, - { - "timestamp_utc": "2025-12-09T01:43:43.786136+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:39:54Z\",\n \"avg_ns\": 5451094383,\n \"stddev_ns\": 1765493,\n \"avg_ts\": 23.481525,\n \"stddev_ts\": 0.007598,\n \"samples_ns\": [ 5451021539, 5452893626, 5449367985 ],\n \"samples_ts\": [ 23.4818, 23.4738, 23.489 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:40:15Z\",\n \"avg_ns\": 69229246374,\n \"stddev_ns\": 191362664,\n \"avg_ts\": 7.395756,\n \"stddev_ts\": 0.020417,\n \"samples_ns\": [ 69444932711, 69162987124, 69079819287 ],\n \"samples_ts\": [ 7.37275, 7.4028, 7.41172 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T01:39:54Z", - "avg_ns": 5451094383, - "stddev_ns": 1765493, - "avg_ts": 23.481525, - "stddev_ts": 0.007598, - "samples_ns": [ - 5451021539, - 5452893626, - 5449367985 - ], - "samples_ts": [ - 23.4818, - 23.4738, - 23.489 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T01:40:15Z", - "avg_ns": 69229246374, - "stddev_ns": 191362664, - "avg_ts": 7.395756, - "stddev_ts": 0.020417, - "samples_ns": [ - 69444932711, - 69162987124, - 69079819287 - ], - "samples_ts": [ - 7.37275, - 7.4028, - 7.41172 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 293 - }, - { - "timestamp_utc": "2025-12-09T01:46:04.003212+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:43:44Z\",\n \"avg_ns\": 21936897157,\n \"stddev_ns\": 17428604,\n \"avg_ts\": 23.339683,\n \"stddev_ts\": 0.018551,\n \"samples_ns\": [ 21949525805, 21944152807, 21917012859 ],\n \"samples_ts\": [ 23.3262, 23.332, 23.3608 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:45:12Z\",\n \"avg_ns\": 17160376802,\n \"stddev_ns\": 34987511,\n \"avg_ts\": 7.459064,\n \"stddev_ts\": 0.015190,\n \"samples_ns\": [ 17200703960, 17138111741, 17142314705 ],\n \"samples_ts\": [ 7.44156, 7.46873, 7.4669 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T01:43:44Z", - "avg_ns": 21936897157, - "stddev_ns": 17428604, - "avg_ts": 23.339683, - "stddev_ts": 0.018551, - "samples_ns": [ - 21949525805, - 21944152807, - 21917012859 - ], - "samples_ts": [ - 23.3262, - 23.332, - 23.3608 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T01:45:12Z", - "avg_ns": 17160376802, - "stddev_ns": 34987511, - "avg_ts": 7.459064, - "stddev_ts": 0.01519, - "samples_ns": [ - 17200703960, - 17138111741, - 17142314705 - ], - "samples_ts": [ - 7.44156, - 7.46873, - 7.4669 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 294 - }, - { - "timestamp_utc": "2025-12-09T01:51:07.066221+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:46:04Z\",\n \"avg_ns\": 21977696753,\n \"stddev_ns\": 83378060,\n \"avg_ts\": 23.296568,\n \"stddev_ts\": 0.088347,\n \"samples_ns\": [ 21897374334, 21971889379, 22063826547 ],\n \"samples_ts\": [ 23.3818, 23.3025, 23.2054 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:47:32Z\",\n \"avg_ns\": 71418701763,\n \"stddev_ns\": 45983535,\n \"avg_ts\": 7.168993,\n \"stddev_ts\": 0.004615,\n \"samples_ns\": [ 71467558516, 71412280297, 71376266476 ],\n \"samples_ts\": [ 7.16409, 7.16964, 7.17325 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T01:46:04Z", - "avg_ns": 21977696753, - "stddev_ns": 83378060, - "avg_ts": 23.296568, - "stddev_ts": 0.088347, - "samples_ns": [ - 21897374334, - 21971889379, - 22063826547 - ], - "samples_ts": [ - 23.3818, - 23.3025, - 23.2054 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T01:47:32Z", - "avg_ns": 71418701763, - "stddev_ns": 45983535, - "avg_ts": 7.168993, - "stddev_ts": 0.004615, - "samples_ns": [ - 71467558516, - 71412280297, - 71376266476 - ], - "samples_ts": [ - 7.16409, - 7.16964, - 7.17325 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 295 - }, - { - "timestamp_utc": "2025-12-09T01:52:21.174242+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:51:07Z\",\n \"avg_ns\": 5441576896,\n \"stddev_ns\": 16956613,\n \"avg_ts\": 23.522745,\n \"stddev_ts\": 0.073212,\n \"samples_ns\": [ 5427400682, 5460360818, 5436969190 ],\n \"samples_ts\": [ 23.584, 23.4417, 23.5425 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:51:29Z\",\n \"avg_ns\": 17154976742,\n \"stddev_ns\": 49931533,\n \"avg_ts\": 7.461434,\n \"stddev_ts\": 0.021705,\n \"samples_ns\": [ 17108628991, 17207850034, 17148451202 ],\n \"samples_ts\": [ 7.4816, 7.43847, 7.46423 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T01:51:07Z", - "avg_ns": 5441576896, - "stddev_ns": 16956613, - "avg_ts": 23.522745, - "stddev_ts": 0.073212, - "samples_ns": [ - 5427400682, - 5460360818, - 5436969190 - ], - "samples_ts": [ - 23.584, - 23.4417, - 23.5425 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T01:51:29Z", - "avg_ns": 17154976742, - "stddev_ns": 49931533, - "avg_ts": 7.461434, - "stddev_ts": 0.021705, - "samples_ns": [ - 17108628991, - 17207850034, - 17148451202 - ], - "samples_ts": [ - 7.4816, - 7.43847, - 7.46423 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 296 - }, - { - "timestamp_utc": "2025-12-09T01:56:13.450038+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:52:21Z\",\n \"avg_ns\": 5441612002,\n \"stddev_ns\": 21249385,\n \"avg_ts\": 23.522680,\n \"stddev_ts\": 0.091677,\n \"samples_ns\": [ 5425759684, 5465757086, 5433319238 ],\n \"samples_ts\": [ 23.5912, 23.4185, 23.5583 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:52:43Z\",\n \"avg_ns\": 69867928927,\n \"stddev_ns\": 72515347,\n \"avg_ts\": 7.328117,\n \"stddev_ts\": 0.007605,\n \"samples_ns\": [ 69942035492, 69864632108, 69797119183 ],\n \"samples_ts\": [ 7.32035, 7.32846, 7.33555 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T01:52:21Z", - "avg_ns": 5441612002, - "stddev_ns": 21249385, - "avg_ts": 23.52268, - "stddev_ts": 0.091677, - "samples_ns": [ - 5425759684, - 5465757086, - 5433319238 - ], - "samples_ts": [ - 23.5912, - 23.4185, - 23.5583 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T01:52:43Z", - "avg_ns": 69867928927, - "stddev_ns": 72515347, - "avg_ts": 7.328117, - "stddev_ts": 0.007605, - "samples_ns": [ - 69942035492, - 69864632108, - 69797119183 - ], - "samples_ts": [ - 7.32035, - 7.32846, - 7.33555 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 297 - }, - { - "timestamp_utc": "2025-12-09T01:58:37.197744+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:56:14Z\",\n \"avg_ns\": 22349753355,\n \"stddev_ns\": 52951143,\n \"avg_ts\": 22.908616,\n \"stddev_ts\": 0.054338,\n \"samples_ns\": [ 22370040889, 22289658480, 22389560697 ],\n \"samples_ts\": [ 22.8878, 22.9703, 22.8678 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:57:43Z\",\n \"avg_ns\": 17847171829,\n \"stddev_ns\": 66459069,\n \"avg_ts\": 7.172071,\n \"stddev_ts\": 0.026663,\n \"samples_ns\": [ 17824752867, 17794821773, 17921940849 ],\n \"samples_ts\": [ 7.18103, 7.1931, 7.14208 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T01:56:14Z", - "avg_ns": 22349753355, - "stddev_ns": 52951143, - "avg_ts": 22.908616, - "stddev_ts": 0.054338, - "samples_ns": [ - 22370040889, - 22289658480, - 22389560697 - ], - "samples_ts": [ - 22.8878, - 22.9703, - 22.8678 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T01:57:43Z", - "avg_ns": 17847171829, - "stddev_ns": 66459069, - "avg_ts": 7.172071, - "stddev_ts": 0.026663, - "samples_ns": [ - 17824752867, - 17794821773, - 17921940849 - ], - "samples_ts": [ - 7.18103, - 7.1931, - 7.14208 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 298 - }, - { - "timestamp_utc": "2025-12-09T02:03:35.292598+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:58:37Z\",\n \"avg_ns\": 22228778688,\n \"stddev_ns\": 38267675,\n \"avg_ts\": 23.033250,\n \"stddev_ts\": 0.039616,\n \"samples_ns\": [ 22202074767, 22272619834, 22211641463 ],\n \"samples_ts\": [ 23.0609, 22.9879, 23.051 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:00:06Z\",\n \"avg_ns\": 69411670427,\n \"stddev_ns\": 177295264,\n \"avg_ts\": 7.376313,\n \"stddev_ts\": 0.018851,\n \"samples_ns\": [ 69575856792, 69435481304, 69223673186 ],\n \"samples_ts\": [ 7.35887, 7.37375, 7.39631 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T01:58:37Z", - "avg_ns": 22228778688, - "stddev_ns": 38267675, - "avg_ts": 23.03325, - "stddev_ts": 0.039616, - "samples_ns": [ - 22202074767, - 22272619834, - 22211641463 - ], - "samples_ts": [ - 23.0609, - 22.9879, - 23.051 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T02:00:06Z", - "avg_ns": 69411670427, - "stddev_ns": 177295264, - "avg_ts": 7.376313, - "stddev_ts": 0.018851, - "samples_ns": [ - 69575856792, - 69435481304, - 69223673186 - ], - "samples_ts": [ - 7.35887, - 7.37375, - 7.39631 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 299 - }, - { - "timestamp_utc": "2025-12-09T02:04:49.545715+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:03:35Z\",\n \"avg_ns\": 5491879627,\n \"stddev_ns\": 19201642,\n \"avg_ts\": 23.307329,\n \"stddev_ts\": 0.081618,\n \"samples_ns\": [ 5498372323, 5470273657, 5506992903 ],\n \"samples_ts\": [ 23.2796, 23.3992, 23.2432 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:03:57Z\",\n \"avg_ns\": 17135665562,\n \"stddev_ns\": 8133801,\n \"avg_ts\": 7.469801,\n \"stddev_ts\": 0.003547,\n \"samples_ns\": [ 17140703712, 17126281976, 17140010998 ],\n \"samples_ts\": [ 7.4676, 7.47389, 7.46791 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T02:03:35Z", - "avg_ns": 5491879627, - "stddev_ns": 19201642, - "avg_ts": 23.307329, - "stddev_ts": 0.081618, - "samples_ns": [ - 5498372323, - 5470273657, - 5506992903 - ], - "samples_ts": [ - 23.2796, - 23.3992, - 23.2432 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T02:03:57Z", - "avg_ns": 17135665562, - "stddev_ns": 8133801, - "avg_ts": 7.469801, - "stddev_ts": 0.003547, - "samples_ns": [ - 17140703712, - 17126281976, - 17140010998 - ], - "samples_ts": [ - 7.4676, - 7.47389, - 7.46791 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 300 - }, - { - "timestamp_utc": "2025-12-09T02:08:39.487322+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:04:50Z\",\n \"avg_ns\": 5437534907,\n \"stddev_ns\": 14029882,\n \"avg_ts\": 23.540183,\n \"stddev_ts\": 0.060703,\n \"samples_ns\": [ 5452509448, 5435401136, 5424694137 ],\n \"samples_ts\": [ 23.4754, 23.5493, 23.5958 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:05:11Z\",\n \"avg_ns\": 69108086638,\n \"stddev_ns\": 105934511,\n \"avg_ts\": 7.408696,\n \"stddev_ts\": 0.011349,\n \"samples_ns\": [ 69226767875, 69023090734, 69074401305 ],\n \"samples_ts\": [ 7.39598, 7.41781, 7.4123 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T02:04:50Z", - "avg_ns": 5437534907, - "stddev_ns": 14029882, - "avg_ts": 23.540183, - "stddev_ts": 0.060703, - "samples_ns": [ - 5452509448, - 5435401136, - 5424694137 - ], - "samples_ts": [ - 23.4754, - 23.5493, - 23.5958 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T02:05:11Z", - "avg_ns": 69108086638, - "stddev_ns": 105934511, - "avg_ts": 7.408696, - "stddev_ts": 0.011349, - "samples_ns": [ - 69226767875, - 69023090734, - 69074401305 - ], - "samples_ts": [ - 7.39598, - 7.41781, - 7.4123 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 301 - }, - { - "timestamp_utc": "2025-12-09T02:10:58.851091+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:08:40Z\",\n \"avg_ns\": 21744075752,\n \"stddev_ns\": 10783467,\n \"avg_ts\": 23.546647,\n \"stddev_ts\": 0.011672,\n \"samples_ns\": [ 21756521900, 21737608264, 21738097094 ],\n \"samples_ts\": [ 23.5332, 23.5536, 23.5531 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:10:07Z\",\n \"avg_ns\": 17173675345,\n \"stddev_ns\": 60354809,\n \"avg_ts\": 7.453329,\n \"stddev_ts\": 0.026247,\n \"samples_ns\": [ 17208124878, 17103985439, 17208915720 ],\n \"samples_ts\": [ 7.43835, 7.48364, 7.438 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T02:08:40Z", - "avg_ns": 21744075752, - "stddev_ns": 10783467, - "avg_ts": 23.546647, - "stddev_ts": 0.011672, - "samples_ns": [ - 21756521900, - 21737608264, - 21738097094 - ], - "samples_ts": [ - 23.5332, - 23.5536, - 23.5531 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T02:10:07Z", - "avg_ns": 17173675345, - "stddev_ns": 60354809, - "avg_ts": 7.453329, - "stddev_ts": 0.026247, - "samples_ns": [ - 17208124878, - 17103985439, - 17208915720 - ], - "samples_ts": [ - 7.43835, - 7.48364, - 7.438 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 302 - }, - { - "timestamp_utc": "2025-12-09T02:15:54.647282+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:10:59Z\",\n \"avg_ns\": 21876602999,\n \"stddev_ns\": 42455421,\n \"avg_ts\": 23.404058,\n \"stddev_ts\": 0.045369,\n \"samples_ns\": [ 21853143788, 21851054059, 21925611151 ],\n \"samples_ts\": [ 23.4291, 23.4314, 23.3517 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:12:27Z\",\n \"avg_ns\": 69146988154,\n \"stddev_ns\": 59647915,\n \"avg_ts\": 7.404520,\n \"stddev_ts\": 0.006388,\n \"samples_ns\": [ 69204821582, 69085677849, 69150465031 ],\n \"samples_ts\": [ 7.39833, 7.41109, 7.40414 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T02:10:59Z", - "avg_ns": 21876602999, - "stddev_ns": 42455421, - "avg_ts": 23.404058, - "stddev_ts": 0.045369, - "samples_ns": [ - 21853143788, - 21851054059, - 21925611151 - ], - "samples_ts": [ - 23.4291, - 23.4314, - 23.3517 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T02:12:27Z", - "avg_ns": 69146988154, - "stddev_ns": 59647915, - "avg_ts": 7.40452, - "stddev_ts": 0.006388, - "samples_ns": [ - 69204821582, - 69085677849, - 69150465031 - ], - "samples_ts": [ - 7.39833, - 7.41109, - 7.40414 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 303 - }, - { - "timestamp_utc": "2025-12-09T02:17:08.762228+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:15:55Z\",\n \"avg_ns\": 5441686361,\n \"stddev_ns\": 7417940,\n \"avg_ts\": 23.522149,\n \"stddev_ts\": 0.032071,\n \"samples_ns\": [ 5448711974, 5442416950, 5433930159 ],\n \"samples_ts\": [ 23.4918, 23.519, 23.5557 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:16:17Z\",\n \"avg_ns\": 17145313606,\n \"stddev_ns\": 28792557,\n \"avg_ts\": 7.465611,\n \"stddev_ts\": 0.012545,\n \"samples_ns\": [ 17169121723, 17113312494, 17153506601 ],\n \"samples_ts\": [ 7.45524, 7.47956, 7.46203 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T02:15:55Z", - "avg_ns": 5441686361, - "stddev_ns": 7417940, - "avg_ts": 23.522149, - "stddev_ts": 0.032071, - "samples_ns": [ - 5448711974, - 5442416950, - 5433930159 - ], - "samples_ts": [ - 23.4918, - 23.519, - 23.5557 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T02:16:17Z", - "avg_ns": 17145313606, - "stddev_ns": 28792557, - "avg_ts": 7.465611, - "stddev_ts": 0.012545, - "samples_ns": [ - 17169121723, - 17113312494, - 17153506601 - ], - "samples_ts": [ - 7.45524, - 7.47956, - 7.46203 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 304 - }, - { - "timestamp_utc": "2025-12-09T02:20:58.966324+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:17:09Z\",\n \"avg_ns\": 5409903228,\n \"stddev_ns\": 17532996,\n \"avg_ts\": 23.660478,\n \"stddev_ts\": 0.076810,\n \"samples_ns\": [ 5417206189, 5389899193, 5422604302 ],\n \"samples_ts\": [ 23.6284, 23.7481, 23.6049 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:17:31Z\",\n \"avg_ns\": 69213787097,\n \"stddev_ns\": 35911697,\n \"avg_ts\": 7.397371,\n \"stddev_ts\": 0.003837,\n \"samples_ns\": [ 69252288920, 69181205062, 69207867311 ],\n \"samples_ts\": [ 7.39326, 7.40085, 7.398 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T02:17:09Z", - "avg_ns": 5409903228, - "stddev_ns": 17532996, - "avg_ts": 23.660478, - "stddev_ts": 0.07681, - "samples_ns": [ - 5417206189, - 5389899193, - 5422604302 - ], - "samples_ts": [ - 23.6284, - 23.7481, - 23.6049 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T02:17:31Z", - "avg_ns": 69213787097, - "stddev_ns": 35911697, - "avg_ts": 7.397371, - "stddev_ts": 0.003837, - "samples_ns": [ - 69252288920, - 69181205062, - 69207867311 - ], - "samples_ts": [ - 7.39326, - 7.40085, - 7.398 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 305 - }, - { - "timestamp_utc": "2025-12-09T02:23:19.015608+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:20:59Z\",\n \"avg_ns\": 21933955245,\n \"stddev_ns\": 48787110,\n \"avg_ts\": 23.342880,\n \"stddev_ts\": 0.051953,\n \"samples_ns\": [ 21977368114, 21881158130, 21943339493 ],\n \"samples_ts\": [ 23.2967, 23.3991, 23.3328 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:22:27Z\",\n \"avg_ns\": 17152618675,\n \"stddev_ns\": 57581974,\n \"avg_ts\": 7.462473,\n \"stddev_ts\": 0.025022,\n \"samples_ns\": [ 17138174204, 17216047687, 17103634134 ],\n \"samples_ts\": [ 7.46871, 7.43492, 7.48379 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T02:20:59Z", - "avg_ns": 21933955245, - "stddev_ns": 48787110, - "avg_ts": 23.34288, - "stddev_ts": 0.051953, - "samples_ns": [ - 21977368114, - 21881158130, - 21943339493 - ], - "samples_ts": [ - 23.2967, - 23.3991, - 23.3328 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T02:22:27Z", - "avg_ns": 17152618675, - "stddev_ns": 57581974, - "avg_ts": 7.462473, - "stddev_ts": 0.025022, - "samples_ns": [ - 17138174204, - 17216047687, - 17103634134 - ], - "samples_ts": [ - 7.46871, - 7.43492, - 7.48379 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 306 - }, - { - "timestamp_utc": "2025-12-09T02:28:15.970939+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:23:19Z\",\n \"avg_ns\": 21912516313,\n \"stddev_ns\": 52541022,\n \"avg_ts\": 23.365731,\n \"stddev_ts\": 0.056096,\n \"samples_ns\": [ 21949815515, 21852428899, 21935304527 ],\n \"samples_ts\": [ 23.3259, 23.4299, 23.3414 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:24:47Z\",\n \"avg_ns\": 69480181418,\n \"stddev_ns\": 51536861,\n \"avg_ts\": 7.369011,\n \"stddev_ts\": 0.005464,\n \"samples_ns\": [ 69438988856, 69463585110, 69537970290 ],\n \"samples_ts\": [ 7.37338, 7.37077, 7.36288 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T02:23:19Z", - "avg_ns": 21912516313, - "stddev_ns": 52541022, - "avg_ts": 23.365731, - "stddev_ts": 0.056096, - "samples_ns": [ - 21949815515, - 21852428899, - 21935304527 - ], - "samples_ts": [ - 23.3259, - 23.4299, - 23.3414 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T02:24:47Z", - "avg_ns": 69480181418, - "stddev_ns": 51536861, - "avg_ts": 7.369011, - "stddev_ts": 0.005464, - "samples_ns": [ - 69438988856, - 69463585110, - 69537970290 - ], - "samples_ts": [ - 7.37338, - 7.37077, - 7.36288 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 307 - }, - { - "timestamp_utc": "2025-12-09T02:29:29.861030+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:28:16Z\",\n \"avg_ns\": 5417626714,\n \"stddev_ns\": 18001373,\n \"avg_ts\": 23.626755,\n \"stddev_ts\": 0.078364,\n \"samples_ns\": [ 5409475588, 5405143052, 5438261504 ],\n \"samples_ts\": [ 23.6622, 23.6811, 23.5369 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:28:38Z\",\n \"avg_ns\": 17116637624,\n \"stddev_ns\": 49580069,\n \"avg_ts\": 7.478146,\n \"stddev_ts\": 0.021681,\n \"samples_ns\": [ 17062526125, 17127503616, 17159883133 ],\n \"samples_ts\": [ 7.50182, 7.47336, 7.45926 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T02:28:16Z", - "avg_ns": 5417626714, - "stddev_ns": 18001373, - "avg_ts": 23.626755, - "stddev_ts": 0.078364, - "samples_ns": [ - 5409475588, - 5405143052, - 5438261504 - ], - "samples_ts": [ - 23.6622, - 23.6811, - 23.5369 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T02:28:38Z", - "avg_ns": 17116637624, - "stddev_ns": 49580069, - "avg_ts": 7.478146, - "stddev_ts": 0.021681, - "samples_ns": [ - 17062526125, - 17127503616, - 17159883133 - ], - "samples_ts": [ - 7.50182, - 7.47336, - 7.45926 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 308 - }, - { - "timestamp_utc": "2025-12-09T02:33:19.934164+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:29:30Z\",\n \"avg_ns\": 5435340276,\n \"stddev_ns\": 4925420,\n \"avg_ts\": 23.549596,\n \"stddev_ts\": 0.021349,\n \"samples_ns\": [ 5436973451, 5429805710, 5439241667 ],\n \"samples_ts\": [ 23.5425, 23.5736, 23.5327 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:29:52Z\",\n \"avg_ns\": 69147722330,\n \"stddev_ns\": 91690718,\n \"avg_ts\": 7.404446,\n \"stddev_ts\": 0.009824,\n \"samples_ns\": [ 69220457468, 69044727069, 69177982455 ],\n \"samples_ts\": [ 7.39666, 7.41548, 7.4012 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T02:29:30Z", - "avg_ns": 5435340276, - "stddev_ns": 4925420, - "avg_ts": 23.549596, - "stddev_ts": 0.021349, - "samples_ns": [ - 5436973451, - 5429805710, - 5439241667 - ], - "samples_ts": [ - 23.5425, - 23.5736, - 23.5327 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T02:29:52Z", - "avg_ns": 69147722330, - "stddev_ns": 91690718, - "avg_ts": 7.404446, - "stddev_ts": 0.009824, - "samples_ns": [ - 69220457468, - 69044727069, - 69177982455 - ], - "samples_ts": [ - 7.39666, - 7.41548, - 7.4012 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 309 - }, - { - "timestamp_utc": "2025-12-09T02:35:41.187096+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:33:20Z\",\n \"avg_ns\": 22218491213,\n \"stddev_ns\": 40379238,\n \"avg_ts\": 23.043920,\n \"stddev_ts\": 0.041842,\n \"samples_ns\": [ 22187850199, 22203376797, 22264246645 ],\n \"samples_ts\": [ 23.0757, 23.0596, 22.9965 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:34:49Z\",\n \"avg_ns\": 17120812830,\n \"stddev_ns\": 35767243,\n \"avg_ts\": 7.476302,\n \"stddev_ts\": 0.015602,\n \"samples_ns\": [ 17161686854, 17095250952, 17105500685 ],\n \"samples_ts\": [ 7.45847, 7.48746, 7.48297 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T02:33:20Z", - "avg_ns": 22218491213, - "stddev_ns": 40379238, - "avg_ts": 23.04392, - "stddev_ts": 0.041842, - "samples_ns": [ - 22187850199, - 22203376797, - 22264246645 - ], - "samples_ts": [ - 23.0757, - 23.0596, - 22.9965 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T02:34:49Z", - "avg_ns": 17120812830, - "stddev_ns": 35767243, - "avg_ts": 7.476302, - "stddev_ts": 0.015602, - "samples_ns": [ - 17161686854, - 17095250952, - 17105500685 - ], - "samples_ts": [ - 7.45847, - 7.48746, - 7.48297 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 310 - }, - { - "timestamp_utc": "2025-12-09T02:40:38.103212+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:35:41Z\",\n \"avg_ns\": 22167688827,\n \"stddev_ns\": 53458761,\n \"avg_ts\": 23.096769,\n \"stddev_ts\": 0.055749,\n \"samples_ns\": [ 22182025170, 22108523839, 22212517473 ],\n \"samples_ts\": [ 23.0818, 23.1585, 23.0501 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:37:10Z\",\n \"avg_ns\": 69102126000,\n \"stddev_ns\": 129544186,\n \"avg_ts\": 7.409341,\n \"stddev_ts\": 0.013905,\n \"samples_ns\": [ 69179220556, 69174591696, 68952565750 ],\n \"samples_ts\": [ 7.40107, 7.40156, 7.42539 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T02:35:41Z", - "avg_ns": 22167688827, - "stddev_ns": 53458761, - "avg_ts": 23.096769, - "stddev_ts": 0.055749, - "samples_ns": [ - 22182025170, - 22108523839, - 22212517473 - ], - "samples_ts": [ - 23.0818, - 23.1585, - 23.0501 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T02:37:10Z", - "avg_ns": 69102126000, - "stddev_ns": 129544186, - "avg_ts": 7.409341, - "stddev_ts": 0.013905, - "samples_ns": [ - 69179220556, - 69174591696, - 68952565750 - ], - "samples_ts": [ - 7.40107, - 7.40156, - 7.42539 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 311 - }, - { - "timestamp_utc": "2025-12-09T02:41:51.997942+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:40:38Z\",\n \"avg_ns\": 5422209357,\n \"stddev_ns\": 3420346,\n \"avg_ts\": 23.606620,\n \"stddev_ts\": 0.014894,\n \"samples_ns\": [ 5418524964, 5422819602, 5425283505 ],\n \"samples_ts\": [ 23.6227, 23.604, 23.5932 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:41:00Z\",\n \"avg_ns\": 17107041354,\n \"stddev_ns\": 37088714,\n \"avg_ts\": 7.482323,\n \"stddev_ts\": 0.016202,\n \"samples_ns\": [ 17088953727, 17082466705, 17149703630 ],\n \"samples_ts\": [ 7.49022, 7.49306, 7.46369 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T02:40:38Z", - "avg_ns": 5422209357, - "stddev_ns": 3420346, - "avg_ts": 23.60662, - "stddev_ts": 0.014894, - "samples_ns": [ - 5418524964, - 5422819602, - 5425283505 - ], - "samples_ts": [ - 23.6227, - 23.604, - 23.5932 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T02:41:00Z", - "avg_ns": 17107041354, - "stddev_ns": 37088714, - "avg_ts": 7.482323, - "stddev_ts": 0.016202, - "samples_ns": [ - 17088953727, - 17082466705, - 17149703630 - ], - "samples_ts": [ - 7.49022, - 7.49306, - 7.46369 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 312 - }, - { - "timestamp_utc": "2025-12-09T02:45:42.423008+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:41:52Z\",\n \"avg_ns\": 5437426252,\n \"stddev_ns\": 7546827,\n \"avg_ts\": 23.540579,\n \"stddev_ts\": 0.032647,\n \"samples_ns\": [ 5431814495, 5434459363, 5446004900 ],\n \"samples_ts\": [ 23.5649, 23.5534, 23.5035 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:42:14Z\",\n \"avg_ns\": 69245291274,\n \"stddev_ns\": 129772115,\n \"avg_ts\": 7.394022,\n \"stddev_ts\": 0.013845,\n \"samples_ns\": [ 69201885248, 69142786393, 69391202183 ],\n \"samples_ts\": [ 7.39864, 7.40497, 7.37846 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T02:41:52Z", - "avg_ns": 5437426252, - "stddev_ns": 7546827, - "avg_ts": 23.540579, - "stddev_ts": 0.032647, - "samples_ns": [ - 5431814495, - 5434459363, - 5446004900 - ], - "samples_ts": [ - 23.5649, - 23.5534, - 23.5035 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T02:42:14Z", - "avg_ns": 69245291274, - "stddev_ns": 129772115, - "avg_ts": 7.394022, - "stddev_ts": 0.013845, - "samples_ns": [ - 69201885248, - 69142786393, - 69391202183 - ], - "samples_ts": [ - 7.39864, - 7.40497, - 7.37846 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 313 - }, - { - "timestamp_utc": "2025-12-09T02:48:02.466907+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:45:43Z\",\n \"avg_ns\": 21918671330,\n \"stddev_ns\": 40848060,\n \"avg_ts\": 23.359134,\n \"stddev_ts\": 0.043576,\n \"samples_ns\": [ 21946713309, 21871805589, 21937495093 ],\n \"samples_ts\": [ 23.3292, 23.4091, 23.339 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:47:10Z\",\n \"avg_ns\": 17157168665,\n \"stddev_ns\": 73028297,\n \"avg_ts\": 7.460528,\n \"stddev_ts\": 0.031681,\n \"samples_ns\": [ 17122539125, 17107897316, 17241069555 ],\n \"samples_ts\": [ 7.47553, 7.48192, 7.42413 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T02:45:43Z", - "avg_ns": 21918671330, - "stddev_ns": 40848060, - "avg_ts": 23.359134, - "stddev_ts": 0.043576, - "samples_ns": [ - 21946713309, - 21871805589, - 21937495093 - ], - "samples_ts": [ - 23.3292, - 23.4091, - 23.339 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T02:47:10Z", - "avg_ns": 17157168665, - "stddev_ns": 73028297, - "avg_ts": 7.460528, - "stddev_ts": 0.031681, - "samples_ns": [ - 17122539125, - 17107897316, - 17241069555 - ], - "samples_ts": [ - 7.47553, - 7.48192, - 7.42413 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 314 - }, - { - "timestamp_utc": "2025-12-09T02:52:58.120240+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:48:03Z\",\n \"avg_ns\": 21789629015,\n \"stddev_ns\": 34451824,\n \"avg_ts\": 23.497456,\n \"stddev_ts\": 0.037136,\n \"samples_ns\": [ 21758917734, 21783086727, 21826882585 ],\n \"samples_ts\": [ 23.5306, 23.5045, 23.4573 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:49:30Z\",\n \"avg_ns\": 69171232961,\n \"stddev_ns\": 62907730,\n \"avg_ts\": 7.401925,\n \"stddev_ts\": 0.006728,\n \"samples_ns\": [ 69130352525, 69243671609, 69139674751 ],\n \"samples_ts\": [ 7.4063, 7.39418, 7.4053 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T02:48:03Z", - "avg_ns": 21789629015, - "stddev_ns": 34451824, - "avg_ts": 23.497456, - "stddev_ts": 0.037136, - "samples_ns": [ - 21758917734, - 21783086727, - 21826882585 - ], - "samples_ts": [ - 23.5306, - 23.5045, - 23.4573 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T02:49:30Z", - "avg_ns": 69171232961, - "stddev_ns": 62907730, - "avg_ts": 7.401925, - "stddev_ts": 0.006728, - "samples_ns": [ - 69130352525, - 69243671609, - 69139674751 - ], - "samples_ts": [ - 7.4063, - 7.39418, - 7.4053 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 315 - }, - { - "timestamp_utc": "2025-12-09T02:54:12.111719+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:52:58Z\",\n \"avg_ns\": 5418292998,\n \"stddev_ns\": 32443181,\n \"avg_ts\": 23.624240,\n \"stddev_ts\": 0.141137,\n \"samples_ns\": [ 5409486415, 5391162463, 5454230118 ],\n \"samples_ts\": [ 23.6621, 23.7426, 23.468 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:53:20Z\",\n \"avg_ns\": 17133879458,\n \"stddev_ns\": 38715297,\n \"avg_ts\": 7.470604,\n \"stddev_ts\": 0.016883,\n \"samples_ns\": [ 17171546907, 17094195974, 17135895495 ],\n \"samples_ts\": [ 7.45419, 7.48792, 7.4697 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T02:52:58Z", - "avg_ns": 5418292998, - "stddev_ns": 32443181, - "avg_ts": 23.62424, - "stddev_ts": 0.141137, - "samples_ns": [ - 5409486415, - 5391162463, - 5454230118 - ], - "samples_ts": [ - 23.6621, - 23.7426, - 23.468 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T02:53:20Z", - "avg_ns": 17133879458, - "stddev_ns": 38715297, - "avg_ts": 7.470604, - "stddev_ts": 0.016883, - "samples_ns": [ - 17171546907, - 17094195974, - 17135895495 - ], - "samples_ts": [ - 7.45419, - 7.48792, - 7.4697 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 316 - }, - { - "timestamp_utc": "2025-12-09T02:58:03.221713+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:54:12Z\",\n \"avg_ns\": 5422059170,\n \"stddev_ns\": 6608069,\n \"avg_ts\": 23.607291,\n \"stddev_ts\": 0.028788,\n \"samples_ns\": [ 5425877274, 5425870460, 5414429778 ],\n \"samples_ts\": [ 23.5907, 23.5907, 23.6405 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:54:34Z\",\n \"avg_ns\": 69495469031,\n \"stddev_ns\": 88338230,\n \"avg_ts\": 7.367395,\n \"stddev_ts\": 0.009369,\n \"samples_ns\": [ 69572867839, 69514307202, 69399232054 ],\n \"samples_ts\": [ 7.35919, 7.36539, 7.3776 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T02:54:12Z", - "avg_ns": 5422059170, - "stddev_ns": 6608069, - "avg_ts": 23.607291, - "stddev_ts": 0.028788, - "samples_ns": [ - 5425877274, - 5425870460, - 5414429778 - ], - "samples_ts": [ - 23.5907, - 23.5907, - 23.6405 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T02:54:34Z", - "avg_ns": 69495469031, - "stddev_ns": 88338230, - "avg_ts": 7.367395, - "stddev_ts": 0.009369, - "samples_ns": [ - 69572867839, - 69514307202, - 69399232054 - ], - "samples_ts": [ - 7.35919, - 7.36539, - 7.3776 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 317 - }, - { - "timestamp_utc": "2025-12-09T03:00:25.152017+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:58:03Z\",\n \"avg_ns\": 22032544094,\n \"stddev_ns\": 8793195,\n \"avg_ts\": 23.238354,\n \"stddev_ts\": 0.009275,\n \"samples_ns\": [ 22036466653, 22022473659, 22038691971 ],\n \"samples_ts\": [ 23.2342, 23.249, 23.2319 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:59:32Z\",\n \"avg_ns\": 17621411430,\n \"stddev_ns\": 50760253,\n \"avg_ts\": 7.263931,\n \"stddev_ts\": 0.020954,\n \"samples_ns\": [ 17563712487, 17659187943, 17641333862 ],\n \"samples_ts\": [ 7.28775, 7.24835, 7.25569 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T02:58:03Z", - "avg_ns": 22032544094, - "stddev_ns": 8793195, - "avg_ts": 23.238354, - "stddev_ts": 0.009275, - "samples_ns": [ - 22036466653, - 22022473659, - 22038691971 - ], - "samples_ts": [ - 23.2342, - 23.249, - 23.2319 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T02:59:32Z", - "avg_ns": 17621411430, - "stddev_ns": 50760253, - "avg_ts": 7.263931, - "stddev_ts": 0.020954, - "samples_ns": [ - 17563712487, - 17659187943, - 17641333862 - ], - "samples_ts": [ - 7.28775, - 7.24835, - 7.25569 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 318 - }, - { - "timestamp_utc": "2025-12-09T03:05:27.827586+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:00:25Z\",\n \"avg_ns\": 21946390380,\n \"stddev_ns\": 54194714,\n \"avg_ts\": 23.329672,\n \"stddev_ts\": 0.057553,\n \"samples_ns\": [ 22006760391, 21930474908, 21901935843 ],\n \"samples_ts\": [ 23.2656, 23.3465, 23.3769 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:01:53Z\",\n \"avg_ns\": 71326476398,\n \"stddev_ns\": 132350628,\n \"avg_ts\": 7.178277,\n \"stddev_ts\": 0.013314,\n \"samples_ns\": [ 71304952006, 71468269942, 71206207246 ],\n \"samples_ts\": [ 7.18043, 7.16402, 7.19038 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T03:00:25Z", - "avg_ns": 21946390380, - "stddev_ns": 54194714, - "avg_ts": 23.329672, - "stddev_ts": 0.057553, - "samples_ns": [ - 22006760391, - 21930474908, - 21901935843 - ], - "samples_ts": [ - 23.2656, - 23.3465, - 23.3769 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T03:01:53Z", - "avg_ns": 71326476398, - "stddev_ns": 132350628, - "avg_ts": 7.178277, - "stddev_ts": 0.013314, - "samples_ns": [ - 71304952006, - 71468269942, - 71206207246 - ], - "samples_ts": [ - 7.18043, - 7.16402, - 7.19038 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 319 - }, - { - "timestamp_utc": "2025-12-09T03:06:41.797796+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:05:28Z\",\n \"avg_ns\": 5440231550,\n \"stddev_ns\": 6727269,\n \"avg_ts\": 23.528434,\n \"stddev_ts\": 0.029112,\n \"samples_ns\": [ 5444005357, 5432465527, 5444223768 ],\n \"samples_ts\": [ 23.5121, 23.562, 23.5112 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:05:50Z\",\n \"avg_ns\": 17100702768,\n \"stddev_ns\": 4059258,\n \"avg_ts\": 7.485073,\n \"stddev_ts\": 0.001776,\n \"samples_ns\": [ 17105380881, 17098580337, 17098147087 ],\n \"samples_ts\": [ 7.48303, 7.486, 7.48619 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T03:05:28Z", - "avg_ns": 5440231550, - "stddev_ns": 6727269, - "avg_ts": 23.528434, - "stddev_ts": 0.029112, - "samples_ns": [ - 5444005357, - 5432465527, - 5444223768 - ], - "samples_ts": [ - 23.5121, - 23.562, - 23.5112 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T03:05:50Z", - "avg_ns": 17100702768, - "stddev_ns": 4059258, - "avg_ts": 7.485073, - "stddev_ts": 0.001776, - "samples_ns": [ - 17105380881, - 17098580337, - 17098147087 - ], - "samples_ts": [ - 7.48303, - 7.486, - 7.48619 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 320 - }, - { - "timestamp_utc": "2025-12-09T03:10:32.106218+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:06:42Z\",\n \"avg_ns\": 5424553836,\n \"stddev_ns\": 25314989,\n \"avg_ts\": 23.596752,\n \"stddev_ts\": 0.109831,\n \"samples_ns\": [ 5411853817, 5408103070, 5453704622 ],\n \"samples_ts\": [ 23.6518, 23.6682, 23.4703 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:07:04Z\",\n \"avg_ns\": 69232992955,\n \"stddev_ns\": 62770445,\n \"avg_ts\": 7.395322,\n \"stddev_ts\": 0.006709,\n \"samples_ns\": [ 69269628426, 69268837111, 69160513328 ],\n \"samples_ts\": [ 7.39141, 7.39149, 7.40307 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T03:06:42Z", - "avg_ns": 5424553836, - "stddev_ns": 25314989, - "avg_ts": 23.596752, - "stddev_ts": 0.109831, - "samples_ns": [ - 5411853817, - 5408103070, - 5453704622 - ], - "samples_ts": [ - 23.6518, - 23.6682, - 23.4703 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T03:07:04Z", - "avg_ns": 69232992955, - "stddev_ns": 62770445, - "avg_ts": 7.395322, - "stddev_ts": 0.006709, - "samples_ns": [ - 69269628426, - 69268837111, - 69160513328 - ], - "samples_ts": [ - 7.39141, - 7.39149, - 7.40307 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 321 - }, - { - "timestamp_utc": "2025-12-09T03:12:54.764031+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:10:32Z\",\n \"avg_ns\": 22212005623,\n \"stddev_ns\": 13068531,\n \"avg_ts\": 23.050603,\n \"stddev_ts\": 0.013564,\n \"samples_ns\": [ 22223705446, 22197902099, 22214409324 ],\n \"samples_ts\": [ 23.0385, 23.0652, 23.0481 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:12:01Z\",\n \"avg_ns\": 17637641073,\n \"stddev_ns\": 42106913,\n \"avg_ts\": 7.257234,\n \"stddev_ts\": 0.017329,\n \"samples_ns\": [ 17678349438, 17640311094, 17594262687 ],\n \"samples_ts\": [ 7.24049, 7.25611, 7.2751 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T03:10:32Z", - "avg_ns": 22212005623, - "stddev_ns": 13068531, - "avg_ts": 23.050603, - "stddev_ts": 0.013564, - "samples_ns": [ - 22223705446, - 22197902099, - 22214409324 - ], - "samples_ts": [ - 23.0385, - 23.0652, - 23.0481 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T03:12:01Z", - "avg_ns": 17637641073, - "stddev_ns": 42106913, - "avg_ts": 7.257234, - "stddev_ts": 0.017329, - "samples_ns": [ - 17678349438, - 17640311094, - 17594262687 - ], - "samples_ts": [ - 7.24049, - 7.25611, - 7.2751 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 322 - }, - { - "timestamp_utc": "2025-12-09T03:17:52.970046+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:12:55Z\",\n \"avg_ns\": 22302675232,\n \"stddev_ns\": 63369355,\n \"avg_ts\": 22.957011,\n \"stddev_ts\": 0.065291,\n \"samples_ns\": [ 22357114866, 22233112794, 22317798037 ],\n \"samples_ts\": [ 22.901, 23.0287, 22.9413 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:14:24Z\",\n \"avg_ns\": 69328700408,\n \"stddev_ns\": 153147673,\n \"avg_ts\": 7.385133,\n \"stddev_ts\": 0.016293,\n \"samples_ns\": [ 69233085405, 69505339075, 69247676745 ],\n \"samples_ts\": [ 7.39531, 7.36634, 7.39375 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T03:12:55Z", - "avg_ns": 22302675232, - "stddev_ns": 63369355, - "avg_ts": 22.957011, - "stddev_ts": 0.065291, - "samples_ns": [ - 22357114866, - 22233112794, - 22317798037 - ], - "samples_ts": [ - 22.901, - 23.0287, - 22.9413 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T03:14:24Z", - "avg_ns": 69328700408, - "stddev_ns": 153147673, - "avg_ts": 7.385133, - "stddev_ts": 0.016293, - "samples_ns": [ - 69233085405, - 69505339075, - 69247676745 - ], - "samples_ts": [ - 7.39531, - 7.36634, - 7.39375 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 323 - }, - { - "timestamp_utc": "2025-12-09T03:18:34.233436+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:17:53Z\",\n \"avg_ns\": 2799402321,\n \"stddev_ns\": 4720762,\n \"avg_ts\": 45.724132,\n \"stddev_ts\": 0.077102,\n \"samples_ns\": [ 2799522824, 2804061086, 2794623055 ],\n \"samples_ts\": [ 45.7221, 45.6481, 45.8022 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:18:04Z\",\n \"avg_ns\": 9741254081,\n \"stddev_ns\": 19415595,\n \"avg_ts\": 13.140027,\n \"stddev_ts\": 0.026216,\n \"samples_ns\": [ 9719129690, 9755453007, 9749179547 ],\n \"samples_ts\": [ 13.1699, 13.1209, 13.1293 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T03:17:53Z", - "avg_ns": 2799402321, - "stddev_ns": 4720762, - "avg_ts": 45.724132, - "stddev_ts": 0.077102, - "samples_ns": [ - 2799522824, - 2804061086, - 2794623055 - ], - "samples_ts": [ - 45.7221, - 45.6481, - 45.8022 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T03:18:04Z", - "avg_ns": 9741254081, - "stddev_ns": 19415595, - "avg_ts": 13.140027, - "stddev_ts": 0.026216, - "samples_ns": [ - 9719129690, - 9755453007, - 9749179547 - ], - "samples_ts": [ - 13.1699, - 13.1209, - 13.1293 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 324 - }, - { - "timestamp_utc": "2025-12-09T03:20:39.345570+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:18:34Z\",\n \"avg_ns\": 2781984056,\n \"stddev_ns\": 7356090,\n \"avg_ts\": 46.010543,\n \"stddev_ts\": 0.121503,\n \"samples_ns\": [ 2790317317, 2776393745, 2779241107 ],\n \"samples_ts\": [ 45.8729, 46.103, 46.0557 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:18:46Z\",\n \"avg_ns\": 37710687109,\n \"stddev_ns\": 57104913,\n \"avg_ts\": 13.577074,\n \"stddev_ts\": 0.020543,\n \"samples_ns\": [ 37670312062, 37685726401, 37776022864 ],\n \"samples_ts\": [ 13.5916, 13.586, 13.5536 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T03:18:34Z", - "avg_ns": 2781984056, - "stddev_ns": 7356090, - "avg_ts": 46.010543, - "stddev_ts": 0.121503, - "samples_ns": [ - 2790317317, - 2776393745, - 2779241107 - ], - "samples_ts": [ - 45.8729, - 46.103, - 46.0557 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T03:18:46Z", - "avg_ns": 37710687109, - "stddev_ns": 57104913, - "avg_ts": 13.577074, - "stddev_ts": 0.020543, - "samples_ns": [ - 37670312062, - 37685726401, - 37776022864 - ], - "samples_ts": [ - 13.5916, - 13.586, - 13.5536 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 325 - }, - { - "timestamp_utc": "2025-12-09T03:21:52.676225+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:20:40Z\",\n \"avg_ns\": 11165829052,\n \"stddev_ns\": 12675397,\n \"avg_ts\": 45.854225,\n \"stddev_ts\": 0.052069,\n \"samples_ns\": [ 11177228330, 11152179064, 11168079762 ],\n \"samples_ts\": [ 45.8074, 45.9103, 45.8449 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:21:24Z\",\n \"avg_ns\": 9286262939,\n \"stddev_ns\": 19600070,\n \"avg_ts\": 13.783842,\n \"stddev_ts\": 0.029067,\n \"samples_ns\": [ 9308126657, 9280394368, 9270267793 ],\n \"samples_ts\": [ 13.7514, 13.7925, 13.8076 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T03:20:40Z", - "avg_ns": 11165829052, - "stddev_ns": 12675397, - "avg_ts": 45.854225, - "stddev_ts": 0.052069, - "samples_ns": [ - 11177228330, - 11152179064, - 11168079762 - ], - "samples_ts": [ - 45.8074, - 45.9103, - 45.8449 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T03:21:24Z", - "avg_ns": 9286262939, - "stddev_ns": 19600070, - "avg_ts": 13.783842, - "stddev_ts": 0.029067, - "samples_ns": [ - 9308126657, - 9280394368, - 9270267793 - ], - "samples_ts": [ - 13.7514, - 13.7925, - 13.8076 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 326 - }, - { - "timestamp_utc": "2025-12-09T03:24:31.237846+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:21:53Z\",\n \"avg_ns\": 11193996950,\n \"stddev_ns\": 23432538,\n \"avg_ts\": 45.738935,\n \"stddev_ts\": 0.095647,\n \"samples_ns\": [ 11185031572, 11220588216, 11176371063 ],\n \"samples_ts\": [ 45.7755, 45.6304, 45.8109 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:22:38Z\",\n \"avg_ns\": 37635494890,\n \"stddev_ns\": 40573775,\n \"avg_ts\": 13.604189,\n \"stddev_ts\": 0.014659,\n \"samples_ns\": [ 37603807759, 37681223240, 37621453673 ],\n \"samples_ts\": [ 13.6156, 13.5877, 13.6093 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T03:21:53Z", - "avg_ns": 11193996950, - "stddev_ns": 23432538, - "avg_ts": 45.738935, - "stddev_ts": 0.095647, - "samples_ns": [ - 11185031572, - 11220588216, - 11176371063 - ], - "samples_ts": [ - 45.7755, - 45.6304, - 45.8109 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T03:22:38Z", - "avg_ns": 37635494890, - "stddev_ns": 40573775, - "avg_ts": 13.604189, - "stddev_ts": 0.014659, - "samples_ns": [ - 37603807759, - 37681223240, - 37621453673 - ], - "samples_ts": [ - 13.6156, - 13.5877, - 13.6093 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 327 - }, - { - "timestamp_utc": "2025-12-09T03:25:12.467464+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:24:31Z\",\n \"avg_ns\": 2782535225,\n \"stddev_ns\": 8212261,\n \"avg_ts\": 46.001482,\n \"stddev_ts\": 0.135851,\n \"samples_ns\": [ 2790102586, 2773802553, 2783700536 ],\n \"samples_ts\": [ 45.8764, 46.146, 45.982 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:24:43Z\",\n \"avg_ns\": 9759087755,\n \"stddev_ns\": 20287706,\n \"avg_ts\": 13.116018,\n \"stddev_ts\": 0.027268,\n \"samples_ns\": [ 9738542257, 9779106446, 9759614564 ],\n \"samples_ts\": [ 13.1437, 13.0891, 13.1153 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T03:24:31Z", - "avg_ns": 2782535225, - "stddev_ns": 8212261, - "avg_ts": 46.001482, - "stddev_ts": 0.135851, - "samples_ns": [ - 2790102586, - 2773802553, - 2783700536 - ], - "samples_ts": [ - 45.8764, - 46.146, - 45.982 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T03:24:43Z", - "avg_ns": 9759087755, - "stddev_ns": 20287706, - "avg_ts": 13.116018, - "stddev_ts": 0.027268, - "samples_ns": [ - 9738542257, - 9779106446, - 9759614564 - ], - "samples_ts": [ - 13.1437, - 13.0891, - 13.1153 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 328 - }, - { - "timestamp_utc": "2025-12-09T03:27:17.442412+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:25:13Z\",\n \"avg_ns\": 2781843432,\n \"stddev_ns\": 14191010,\n \"avg_ts\": 46.013451,\n \"stddev_ts\": 0.234188,\n \"samples_ns\": [ 2797814582, 2777031342, 2770684374 ],\n \"samples_ts\": [ 45.75, 46.0924, 46.198 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:25:24Z\",\n \"avg_ns\": 37667884816,\n \"stddev_ns\": 41419518,\n \"avg_ts\": 13.592492,\n \"stddev_ts\": 0.014945,\n \"samples_ns\": [ 37666116484, 37710159725, 37627378240 ],\n \"samples_ts\": [ 13.5931, 13.5772, 13.6071 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T03:25:13Z", - "avg_ns": 2781843432, - "stddev_ns": 14191010, - "avg_ts": 46.013451, - "stddev_ts": 0.234188, - "samples_ns": [ - 2797814582, - 2777031342, - 2770684374 - ], - "samples_ts": [ - 45.75, - 46.0924, - 46.198 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T03:25:24Z", - "avg_ns": 37667884816, - "stddev_ns": 41419518, - "avg_ts": 13.592492, - "stddev_ts": 0.014945, - "samples_ns": [ - 37666116484, - 37710159725, - 37627378240 - ], - "samples_ts": [ - 13.5931, - 13.5772, - 13.6071 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 329 - }, - { - "timestamp_utc": "2025-12-09T03:28:31.707636+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:27:18Z\",\n \"avg_ns\": 11383241811,\n \"stddev_ns\": 234742889,\n \"avg_ts\": 44.991005,\n \"stddev_ts\": 0.916882,\n \"samples_ns\": [ 11245835772, 11654290817, 11249598845 ],\n \"samples_ts\": [ 45.528, 43.9323, 45.5127 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:28:03Z\",\n \"avg_ns\": 9323097777,\n \"stddev_ns\": 22754685,\n \"avg_ts\": 13.729397,\n \"stddev_ts\": 0.033510,\n \"samples_ns\": [ 9323341205, 9345729363, 9300222765 ],\n \"samples_ts\": [ 13.729, 13.6961, 13.7631 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T03:27:18Z", - "avg_ns": 11383241811, - "stddev_ns": 234742889, - "avg_ts": 44.991005, - "stddev_ts": 0.916882, - "samples_ns": [ - 11245835772, - 11654290817, - 11249598845 - ], - "samples_ts": [ - 45.528, - 43.9323, - 45.5127 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T03:28:03Z", - "avg_ns": 9323097777, - "stddev_ns": 22754685, - "avg_ts": 13.729397, - "stddev_ts": 0.03351, - "samples_ns": [ - 9323341205, - 9345729363, - 9300222765 - ], - "samples_ts": [ - 13.729, - 13.6961, - 13.7631 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 330 - }, - { - "timestamp_utc": "2025-12-09T03:31:10.793950+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:28:32Z\",\n \"avg_ns\": 11245195136,\n \"stddev_ns\": 24575153,\n \"avg_ts\": 45.530702,\n \"stddev_ts\": 0.099559,\n \"samples_ns\": [ 11267301288, 11249550716, 11218733404 ],\n \"samples_ts\": [ 45.4412, 45.5129, 45.638 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:29:17Z\",\n \"avg_ns\": 37746847080,\n \"stddev_ns\": 47955863,\n \"avg_ts\": 13.564061,\n \"stddev_ts\": 0.017245,\n \"samples_ns\": [ 37691688061, 37770198495, 37778654684 ],\n \"samples_ts\": [ 13.5839, 13.5557, 13.5526 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T03:28:32Z", - "avg_ns": 11245195136, - "stddev_ns": 24575153, - "avg_ts": 45.530702, - "stddev_ts": 0.099559, - "samples_ns": [ - 11267301288, - 11249550716, - 11218733404 - ], - "samples_ts": [ - 45.4412, - 45.5129, - 45.638 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T03:29:17Z", - "avg_ns": 37746847080, - "stddev_ns": 47955863, - "avg_ts": 13.564061, - "stddev_ts": 0.017245, - "samples_ns": [ - 37691688061, - 37770198495, - 37778654684 - ], - "samples_ts": [ - 13.5839, - 13.5557, - 13.5526 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 331 - }, - { - "timestamp_utc": "2025-12-09T03:31:52.241179+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:31:11Z\",\n \"avg_ns\": 2790173062,\n \"stddev_ns\": 7234749,\n \"avg_ts\": 45.875496,\n \"stddev_ts\": 0.118838,\n \"samples_ns\": [ 2784014958, 2798140298, 2788363932 ],\n \"samples_ts\": [ 45.9768, 45.7447, 45.9051 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:31:22Z\",\n \"avg_ns\": 9807217323,\n \"stddev_ns\": 39545647,\n \"avg_ts\": 13.051754,\n \"stddev_ts\": 0.052623,\n \"samples_ns\": [ 9806475037, 9768048168, 9847128765 ],\n \"samples_ts\": [ 13.0526, 13.1039, 12.9987 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T03:31:11Z", - "avg_ns": 2790173062, - "stddev_ns": 7234749, - "avg_ts": 45.875496, - "stddev_ts": 0.118838, - "samples_ns": [ - 2784014958, - 2798140298, - 2788363932 - ], - "samples_ts": [ - 45.9768, - 45.7447, - 45.9051 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T03:31:22Z", - "avg_ns": 9807217323, - "stddev_ns": 39545647, - "avg_ts": 13.051754, - "stddev_ts": 0.052623, - "samples_ns": [ - 9806475037, - 9768048168, - 9847128765 - ], - "samples_ts": [ - 13.0526, - 13.1039, - 12.9987 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 332 - }, - { - "timestamp_utc": "2025-12-09T03:33:57.144018+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:31:52Z\",\n \"avg_ns\": 2791588048,\n \"stddev_ns\": 10173851,\n \"avg_ts\": 45.852443,\n \"stddev_ts\": 0.166815,\n \"samples_ns\": [ 2783790450, 2803096037, 2787877659 ],\n \"samples_ts\": [ 45.9805, 45.6638, 45.9131 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:32:04Z\",\n \"avg_ns\": 37640495284,\n \"stddev_ns\": 48200379,\n \"avg_ts\": 13.602386,\n \"stddev_ts\": 0.017406,\n \"samples_ns\": [ 37618268914, 37695798178, 37607418761 ],\n \"samples_ts\": [ 13.6104, 13.5824, 13.6143 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T03:31:52Z", - "avg_ns": 2791588048, - "stddev_ns": 10173851, - "avg_ts": 45.852443, - "stddev_ts": 0.166815, - "samples_ns": [ - 2783790450, - 2803096037, - 2787877659 - ], - "samples_ts": [ - 45.9805, - 45.6638, - 45.9131 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T03:32:04Z", - "avg_ns": 37640495284, - "stddev_ns": 48200379, - "avg_ts": 13.602386, - "stddev_ts": 0.017406, - "samples_ns": [ - 37618268914, - 37695798178, - 37607418761 - ], - "samples_ts": [ - 13.6104, - 13.5824, - 13.6143 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 333 - }, - { - "timestamp_utc": "2025-12-09T03:35:11.577928+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:33:57Z\",\n \"avg_ns\": 11395636973,\n \"stddev_ns\": 9486622,\n \"avg_ts\": 44.929497,\n \"stddev_ts\": 0.037385,\n \"samples_ns\": [ 11392794173, 11406218728, 11387898020 ],\n \"samples_ts\": [ 44.9407, 44.8878, 44.96 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:34:43Z\",\n \"avg_ns\": 9336588621,\n \"stddev_ns\": 28051850,\n \"avg_ts\": 13.709587,\n \"stddev_ts\": 0.041169,\n \"samples_ns\": [ 9310283188, 9333373225, 9366109451 ],\n \"samples_ts\": [ 13.7482, 13.7142, 13.6663 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T03:33:57Z", - "avg_ns": 11395636973, - "stddev_ns": 9486622, - "avg_ts": 44.929497, - "stddev_ts": 0.037385, - "samples_ns": [ - 11392794173, - 11406218728, - 11387898020 - ], - "samples_ts": [ - 44.9407, - 44.8878, - 44.96 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T03:34:43Z", - "avg_ns": 9336588621, - "stddev_ns": 28051850, - "avg_ts": 13.709587, - "stddev_ts": 0.041169, - "samples_ns": [ - 9310283188, - 9333373225, - 9366109451 - ], - "samples_ts": [ - 13.7482, - 13.7142, - 13.6663 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 334 - }, - { - "timestamp_utc": "2025-12-09T03:37:50.954944+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:35:12Z\",\n \"avg_ns\": 11347554182,\n \"stddev_ns\": 18599874,\n \"avg_ts\": 45.119936,\n \"stddev_ts\": 0.073891,\n \"samples_ns\": [ 11339398767, 11334425470, 11368838310 ],\n \"samples_ts\": [ 45.1523, 45.1721, 45.0354 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:35:57Z\",\n \"avg_ns\": 37696618311,\n \"stddev_ns\": 62200381,\n \"avg_ts\": 13.582145,\n \"stddev_ts\": 0.022393,\n \"samples_ns\": [ 37767098007, 37673349641, 37649407286 ],\n \"samples_ts\": [ 13.5568, 13.5905, 13.5992 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T03:35:12Z", - "avg_ns": 11347554182, - "stddev_ns": 18599874, - "avg_ts": 45.119936, - "stddev_ts": 0.073891, - "samples_ns": [ - 11339398767, - 11334425470, - 11368838310 - ], - "samples_ts": [ - 45.1523, - 45.1721, - 45.0354 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T03:35:57Z", - "avg_ns": 37696618311, - "stddev_ns": 62200381, - "avg_ts": 13.582145, - "stddev_ts": 0.022393, - "samples_ns": [ - 37767098007, - 37673349641, - 37649407286 - ], - "samples_ts": [ - 13.5568, - 13.5905, - 13.5992 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 335 - }, - { - "timestamp_utc": "2025-12-09T03:38:30.951548+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:37:51Z\",\n \"avg_ns\": 2804728981,\n \"stddev_ns\": 10198291,\n \"avg_ts\": 45.637610,\n \"stddev_ts\": 0.165691,\n \"samples_ns\": [ 2796481980, 2801572689, 2816132274 ],\n \"samples_ts\": [ 45.7718, 45.6886, 45.4524 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:38:02Z\",\n \"avg_ns\": 9321096820,\n \"stddev_ns\": 14093739,\n \"avg_ts\": 13.732310,\n \"stddev_ts\": 0.020776,\n \"samples_ns\": [ 9332357494, 9325640607, 9305292361 ],\n \"samples_ts\": [ 13.7157, 13.7256, 13.7556 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T03:37:51Z", - "avg_ns": 2804728981, - "stddev_ns": 10198291, - "avg_ts": 45.63761, - "stddev_ts": 0.165691, - "samples_ns": [ - 2796481980, - 2801572689, - 2816132274 - ], - "samples_ts": [ - 45.7718, - 45.6886, - 45.4524 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T03:38:02Z", - "avg_ns": 9321096820, - "stddev_ns": 14093739, - "avg_ts": 13.73231, - "stddev_ts": 0.020776, - "samples_ns": [ - 9332357494, - 9325640607, - 9305292361 - ], - "samples_ts": [ - 13.7157, - 13.7256, - 13.7556 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 336 - }, - { - "timestamp_utc": "2025-12-09T03:40:36.316751+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:38:31Z\",\n \"avg_ns\": 2776953585,\n \"stddev_ns\": 15259272,\n \"avg_ts\": 46.094603,\n \"stddev_ts\": 0.253066,\n \"samples_ns\": [ 2762593612, 2775291287, 2792975857 ],\n \"samples_ts\": [ 46.3333, 46.1213, 45.8293 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:38:42Z\",\n \"avg_ns\": 37805231390,\n \"stddev_ns\": 137230120,\n \"avg_ts\": 13.543218,\n \"stddev_ts\": 0.049105,\n \"samples_ns\": [ 37774997188, 37685639450, 37955057532 ],\n \"samples_ts\": [ 13.5539, 13.5861, 13.4896 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T03:38:31Z", - "avg_ns": 2776953585, - "stddev_ns": 15259272, - "avg_ts": 46.094603, - "stddev_ts": 0.253066, - "samples_ns": [ - 2762593612, - 2775291287, - 2792975857 - ], - "samples_ts": [ - 46.3333, - 46.1213, - 45.8293 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T03:38:42Z", - "avg_ns": 37805231390, - "stddev_ns": 137230120, - "avg_ts": 13.543218, - "stddev_ts": 0.049105, - "samples_ns": [ - 37774997188, - 37685639450, - 37955057532 - ], - "samples_ts": [ - 13.5539, - 13.5861, - 13.4896 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 337 - }, - { - "timestamp_utc": "2025-12-09T03:41:49.866354+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:40:36Z\",\n \"avg_ns\": 11186509804,\n \"stddev_ns\": 24323709,\n \"avg_ts\": 45.769559,\n \"stddev_ts\": 0.099613,\n \"samples_ns\": [ 11206044214, 11194219668, 11159265530 ],\n \"samples_ts\": [ 45.6896, 45.7379, 45.8812 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:41:21Z\",\n \"avg_ns\": 9309505210,\n \"stddev_ns\": 27013027,\n \"avg_ts\": 13.749465,\n \"stddev_ts\": 0.039941,\n \"samples_ns\": [ 9331818051, 9317224381, 9279473200 ],\n \"samples_ts\": [ 13.7165, 13.738, 13.7939 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T03:40:36Z", - "avg_ns": 11186509804, - "stddev_ns": 24323709, - "avg_ts": 45.769559, - "stddev_ts": 0.099613, - "samples_ns": [ - 11206044214, - 11194219668, - 11159265530 - ], - "samples_ts": [ - 45.6896, - 45.7379, - 45.8812 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T03:41:21Z", - "avg_ns": 9309505210, - "stddev_ns": 27013027, - "avg_ts": 13.749465, - "stddev_ts": 0.039941, - "samples_ns": [ - 9331818051, - 9317224381, - 9279473200 - ], - "samples_ts": [ - 13.7165, - 13.738, - 13.7939 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 338 - }, - { - "timestamp_utc": "2025-12-09T03:44:28.274313+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:41:50Z\",\n \"avg_ns\": 11139019930,\n \"stddev_ns\": 25247857,\n \"avg_ts\": 45.964704,\n \"stddev_ts\": 0.104282,\n \"samples_ns\": [ 11159395208, 11146889647, 11110774937 ],\n \"samples_ts\": [ 45.8806, 45.9321, 46.0814 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:42:35Z\",\n \"avg_ns\": 37646079658,\n \"stddev_ns\": 42542773,\n \"avg_ts\": 13.600365,\n \"stddev_ts\": 0.015375,\n \"samples_ns\": [ 37656880386, 37682180597, 37599177992 ],\n \"samples_ts\": [ 13.5965, 13.5873, 13.6173 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T03:41:50Z", - "avg_ns": 11139019930, - "stddev_ns": 25247857, - "avg_ts": 45.964704, - "stddev_ts": 0.104282, - "samples_ns": [ - 11159395208, - 11146889647, - 11110774937 - ], - "samples_ts": [ - 45.8806, - 45.9321, - 46.0814 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T03:42:35Z", - "avg_ns": 37646079658, - "stddev_ns": 42542773, - "avg_ts": 13.600365, - "stddev_ts": 0.015375, - "samples_ns": [ - 37656880386, - 37682180597, - 37599177992 - ], - "samples_ts": [ - 13.5965, - 13.5873, - 13.6173 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 339 - }, - { - "timestamp_utc": "2025-12-09T03:45:08.351784+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:44:28Z\",\n \"avg_ns\": 2806315962,\n \"stddev_ns\": 8380355,\n \"avg_ts\": 45.611672,\n \"stddev_ts\": 0.136442,\n \"samples_ns\": [ 2811434348, 2810868868, 2796644670 ],\n \"samples_ts\": [ 45.5284, 45.5375, 45.7691 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:44:40Z\",\n \"avg_ns\": 9348231872,\n \"stddev_ns\": 17626601,\n \"avg_ts\": 13.692461,\n \"stddev_ts\": 0.025803,\n \"samples_ns\": [ 9344455514, 9332799502, 9367440600 ],\n \"samples_ts\": [ 13.698, 13.7151, 13.6644 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T03:44:28Z", - "avg_ns": 2806315962, - "stddev_ns": 8380355, - "avg_ts": 45.611672, - "stddev_ts": 0.136442, - "samples_ns": [ - 2811434348, - 2810868868, - 2796644670 - ], - "samples_ts": [ - 45.5284, - 45.5375, - 45.7691 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T03:44:40Z", - "avg_ns": 9348231872, - "stddev_ns": 17626601, - "avg_ts": 13.692461, - "stddev_ts": 0.025803, - "samples_ns": [ - 9344455514, - 9332799502, - 9367440600 - ], - "samples_ts": [ - 13.698, - 13.7151, - 13.6644 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 340 - }, - { - "timestamp_utc": "2025-12-09T03:47:13.251819+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:45:09Z\",\n \"avg_ns\": 2786632456,\n \"stddev_ns\": 13957042,\n \"avg_ts\": 45.934347,\n \"stddev_ts\": 0.230330,\n \"samples_ns\": [ 2771712071, 2799368331, 2788816967 ],\n \"samples_ts\": [ 46.1808, 45.7246, 45.8976 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:45:20Z\",\n \"avg_ns\": 37638807028,\n \"stddev_ns\": 50169782,\n \"avg_ts\": 13.602998,\n \"stddev_ts\": 0.018120,\n \"samples_ns\": [ 37620754396, 37600161718, 37695504970 ],\n \"samples_ts\": [ 13.6095, 13.617, 13.5825 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T03:45:09Z", - "avg_ns": 2786632456, - "stddev_ns": 13957042, - "avg_ts": 45.934347, - "stddev_ts": 0.23033, - "samples_ns": [ - 2771712071, - 2799368331, - 2788816967 - ], - "samples_ts": [ - 46.1808, - 45.7246, - 45.8976 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T03:45:20Z", - "avg_ns": 37638807028, - "stddev_ns": 50169782, - "avg_ts": 13.602998, - "stddev_ts": 0.01812, - "samples_ns": [ - 37620754396, - 37600161718, - 37695504970 - ], - "samples_ts": [ - 13.6095, - 13.617, - 13.5825 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 341 - }, - { - "timestamp_utc": "2025-12-09T03:48:26.912696+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:47:13Z\",\n \"avg_ns\": 11220863168,\n \"stddev_ns\": 35372564,\n \"avg_ts\": 45.629590,\n \"stddev_ts\": 0.143585,\n \"samples_ns\": [ 11198313616, 11202644804, 11261631085 ],\n \"samples_ts\": [ 45.7212, 45.7035, 45.4641 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:47:58Z\",\n \"avg_ns\": 9315156611,\n \"stddev_ns\": 40311096,\n \"avg_ts\": 13.741218,\n \"stddev_ts\": 0.059321,\n \"samples_ns\": [ 9361535068, 9288538930, 9295395836 ],\n \"samples_ts\": [ 13.673, 13.7804, 13.7703 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T03:47:13Z", - "avg_ns": 11220863168, - "stddev_ns": 35372564, - "avg_ts": 45.62959, - "stddev_ts": 0.143585, - "samples_ns": [ - 11198313616, - 11202644804, - 11261631085 - ], - "samples_ts": [ - 45.7212, - 45.7035, - 45.4641 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T03:47:58Z", - "avg_ns": 9315156611, - "stddev_ns": 40311096, - "avg_ts": 13.741218, - "stddev_ts": 0.059321, - "samples_ns": [ - 9361535068, - 9288538930, - 9295395836 - ], - "samples_ts": [ - 13.673, - 13.7804, - 13.7703 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 342 - }, - { - "timestamp_utc": "2025-12-09T03:51:05.848967+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:48:27Z\",\n \"avg_ns\": 11238956769,\n \"stddev_ns\": 32095364,\n \"avg_ts\": 45.556077,\n \"stddev_ts\": 0.130085,\n \"samples_ns\": [ 11207187813, 11271368876, 11238313618 ],\n \"samples_ts\": [ 45.685, 45.4248, 45.5584 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:49:12Z\",\n \"avg_ns\": 37720570679,\n \"stddev_ns\": 126449920,\n \"avg_ts\": 13.573597,\n \"stddev_ts\": 0.045513,\n \"samples_ns\": [ 37591261495, 37726497972, 37843952571 ],\n \"samples_ts\": [ 13.6202, 13.5714, 13.5292 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T03:48:27Z", - "avg_ns": 11238956769, - "stddev_ns": 32095364, - "avg_ts": 45.556077, - "stddev_ts": 0.130085, - "samples_ns": [ - 11207187813, - 11271368876, - 11238313618 - ], - "samples_ts": [ - 45.685, - 45.4248, - 45.5584 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T03:49:12Z", - "avg_ns": 37720570679, - "stddev_ns": 126449920, - "avg_ts": 13.573597, - "stddev_ts": 0.045513, - "samples_ns": [ - 37591261495, - 37726497972, - 37843952571 - ], - "samples_ts": [ - 13.6202, - 13.5714, - 13.5292 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 343 - }, - { - "timestamp_utc": "2025-12-09T03:51:45.667251+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:51:06Z\",\n \"avg_ns\": 2782860760,\n \"stddev_ns\": 4787662,\n \"avg_ts\": 45.995924,\n \"stddev_ts\": 0.079191,\n \"samples_ns\": [ 2777415840, 2786408314, 2784758128 ],\n \"samples_ts\": [ 46.086, 45.9373, 45.9645 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:51:17Z\",\n \"avg_ns\": 9288892983,\n \"stddev_ns\": 29478133,\n \"avg_ts\": 13.779991,\n \"stddev_ts\": 0.043729,\n \"samples_ns\": [ 9288654605, 9259535078, 9318489268 ],\n \"samples_ts\": [ 13.7803, 13.8236, 13.7361 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T03:51:06Z", - "avg_ns": 2782860760, - "stddev_ns": 4787662, - "avg_ts": 45.995924, - "stddev_ts": 0.079191, - "samples_ns": [ - 2777415840, - 2786408314, - 2784758128 - ], - "samples_ts": [ - 46.086, - 45.9373, - 45.9645 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T03:51:17Z", - "avg_ns": 9288892983, - "stddev_ns": 29478133, - "avg_ts": 13.779991, - "stddev_ts": 0.043729, - "samples_ns": [ - 9288654605, - 9259535078, - 9318489268 - ], - "samples_ts": [ - 13.7803, - 13.8236, - 13.7361 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 344 - }, - { - "timestamp_utc": "2025-12-09T03:53:50.654685+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:51:46Z\",\n \"avg_ns\": 2796949233,\n \"stddev_ns\": 8633527,\n \"avg_ts\": 45.764439,\n \"stddev_ts\": 0.141324,\n \"samples_ns\": [ 2787949676, 2797735015, 2805163008 ],\n \"samples_ts\": [ 45.9119, 45.7513, 45.6301 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:51:57Z\",\n \"avg_ns\": 37655988530,\n \"stddev_ns\": 148470350,\n \"avg_ts\": 13.596916,\n \"stddev_ts\": 0.053490,\n \"samples_ns\": [ 37578317745, 37562465010, 37827182835 ],\n \"samples_ts\": [ 13.6249, 13.6306, 13.5352 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T03:51:46Z", - "avg_ns": 2796949233, - "stddev_ns": 8633527, - "avg_ts": 45.764439, - "stddev_ts": 0.141324, - "samples_ns": [ - 2787949676, - 2797735015, - 2805163008 - ], - "samples_ts": [ - 45.9119, - 45.7513, - 45.6301 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T03:51:57Z", - "avg_ns": 37655988530, - "stddev_ns": 148470350, - "avg_ts": 13.596916, - "stddev_ts": 0.05349, - "samples_ns": [ - 37578317745, - 37562465010, - 37827182835 - ], - "samples_ts": [ - 13.6249, - 13.6306, - 13.5352 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 345 - }, - { - "timestamp_utc": "2025-12-09T03:55:04.988983+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:53:51Z\",\n \"avg_ns\": 11351756344,\n \"stddev_ns\": 14319520,\n \"avg_ts\": 45.103201,\n \"stddev_ts\": 0.056856,\n \"samples_ns\": [ 11368126242, 11341557157, 11345585634 ],\n \"samples_ts\": [ 45.0382, 45.1437, 45.1277 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:54:36Z\",\n \"avg_ns\": 9352221114,\n \"stddev_ns\": 66148412,\n \"avg_ts\": 13.687046,\n \"stddev_ts\": 0.097147,\n \"samples_ns\": [ 9277126426, 9377677776, 9401859142 ],\n \"samples_ts\": [ 13.7974, 13.6494, 13.6143 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T03:53:51Z", - "avg_ns": 11351756344, - "stddev_ns": 14319520, - "avg_ts": 45.103201, - "stddev_ts": 0.056856, - "samples_ns": [ - 11368126242, - 11341557157, - 11345585634 - ], - "samples_ts": [ - 45.0382, - 45.1437, - 45.1277 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T03:54:36Z", - "avg_ns": 9352221114, - "stddev_ns": 66148412, - "avg_ts": 13.687046, - "stddev_ts": 0.097147, - "samples_ns": [ - 9277126426, - 9377677776, - 9401859142 - ], - "samples_ts": [ - 13.7974, - 13.6494, - 13.6143 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 346 - }, - { - "timestamp_utc": "2025-12-09T03:57:44.237924+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:55:05Z\",\n \"avg_ns\": 11370694207,\n \"stddev_ns\": 20750710,\n \"avg_ts\": 45.028134,\n \"stddev_ts\": 0.082202,\n \"samples_ns\": [ 11373592099, 11389843100, 11348647424 ],\n \"samples_ts\": [ 45.0166, 44.9523, 45.1155 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:55:51Z\",\n \"avg_ns\": 37623689643,\n \"stddev_ns\": 12656249,\n \"avg_ts\": 13.608448,\n \"stddev_ts\": 0.004577,\n \"samples_ns\": [ 37613152776, 37637727995, 37620188158 ],\n \"samples_ts\": [ 13.6123, 13.6034, 13.6097 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T03:55:05Z", - "avg_ns": 11370694207, - "stddev_ns": 20750710, - "avg_ts": 45.028134, - "stddev_ts": 0.082202, - "samples_ns": [ - 11373592099, - 11389843100, - 11348647424 - ], - "samples_ts": [ - 45.0166, - 44.9523, - 45.1155 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T03:55:51Z", - "avg_ns": 37623689643, - "stddev_ns": 12656249, - "avg_ts": 13.608448, - "stddev_ts": 0.004577, - "samples_ns": [ - 37613152776, - 37637727995, - 37620188158 - ], - "samples_ts": [ - 13.6123, - 13.6034, - 13.6097 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 347 - }, - { - "timestamp_utc": "2025-12-09T03:58:25.386828+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:57:44Z\",\n \"avg_ns\": 2788157990,\n \"stddev_ns\": 10999131,\n \"avg_ts\": 45.908921,\n \"stddev_ts\": 0.180816,\n \"samples_ns\": [ 2779147550, 2800414616, 2784911806 ],\n \"samples_ts\": [ 46.0573, 45.7075, 45.962 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:57:56Z\",\n \"avg_ns\": 9713854758,\n \"stddev_ns\": 36149058,\n \"avg_ts\": 13.177177,\n \"stddev_ts\": 0.049129,\n \"samples_ns\": [ 9740594525, 9672727293, 9728242457 ],\n \"samples_ts\": [ 13.1409, 13.2331, 13.1576 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T03:57:44Z", - "avg_ns": 2788157990, - "stddev_ns": 10999131, - "avg_ts": 45.908921, - "stddev_ts": 0.180816, - "samples_ns": [ - 2779147550, - 2800414616, - 2784911806 - ], - "samples_ts": [ - 46.0573, - 45.7075, - 45.962 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T03:57:56Z", - "avg_ns": 9713854758, - "stddev_ns": 36149058, - "avg_ts": 13.177177, - "stddev_ts": 0.049129, - "samples_ns": [ - 9740594525, - 9672727293, - 9728242457 - ], - "samples_ts": [ - 13.1409, - 13.2331, - 13.1576 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 348 - }, - { - "timestamp_utc": "2025-12-09T04:00:37.326183+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:58:26Z\",\n \"avg_ns\": 2854568308,\n \"stddev_ns\": 76421137,\n \"avg_ts\": 44.861529,\n \"stddev_ts\": 1.183562,\n \"samples_ns\": [ 2803079849, 2818248955, 2942376122 ],\n \"samples_ts\": [ 45.6641, 45.4183, 43.5023 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:58:37Z\",\n \"avg_ns\": 39909928769,\n \"stddev_ns\": 1168043127,\n \"avg_ts\": 12.836098,\n \"stddev_ts\": 0.369538,\n \"samples_ns\": [ 41256189290, 39166005112, 39307591905 ],\n \"samples_ts\": [ 12.4103, 13.0726, 13.0255 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T03:58:26Z", - "avg_ns": 2854568308, - "stddev_ns": 76421137, - "avg_ts": 44.861529, - "stddev_ts": 1.183562, - "samples_ns": [ - 2803079849, - 2818248955, - 2942376122 - ], - "samples_ts": [ - 45.6641, - 45.4183, - 43.5023 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T03:58:37Z", - "avg_ns": 39909928769, - "stddev_ns": 1168043127, - "avg_ts": 12.836098, - "stddev_ts": 0.369538, - "samples_ns": [ - 41256189290, - 39166005112, - 39307591905 - ], - "samples_ts": [ - 12.4103, - 13.0726, - 13.0255 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 349 - }, - { - "timestamp_utc": "2025-12-09T04:01:52.036689+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:00:37Z\",\n \"avg_ns\": 11277388922,\n \"stddev_ns\": 192486622,\n \"avg_ts\": 45.409314,\n \"stddev_ts\": 0.767583,\n \"samples_ns\": [ 11175863765, 11499384005, 11156918997 ],\n \"samples_ts\": [ 45.813, 44.5241, 45.8908 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:01:23Z\",\n \"avg_ns\": 9622818410,\n \"stddev_ns\": 28149953,\n \"avg_ts\": 13.301792,\n \"stddev_ts\": 0.038888,\n \"samples_ns\": [ 9618752791, 9596922502, 9652779938 ],\n \"samples_ts\": [ 13.3073, 13.3376, 13.2604 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T04:00:37Z", - "avg_ns": 11277388922, - "stddev_ns": 192486622, - "avg_ts": 45.409314, - "stddev_ts": 0.767583, - "samples_ns": [ - 11175863765, - 11499384005, - 11156918997 - ], - "samples_ts": [ - 45.813, - 44.5241, - 45.8908 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T04:01:23Z", - "avg_ns": 9622818410, - "stddev_ns": 28149953, - "avg_ts": 13.301792, - "stddev_ts": 0.038888, - "samples_ns": [ - 9618752791, - 9596922502, - 9652779938 - ], - "samples_ts": [ - 13.3073, - 13.3376, - 13.2604 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 350 - }, - { - "timestamp_utc": "2025-12-09T04:04:30.754010+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:01:52Z\",\n \"avg_ns\": 11206063171,\n \"stddev_ns\": 25674513,\n \"avg_ts\": 45.689711,\n \"stddev_ts\": 0.104627,\n \"samples_ns\": [ 11202199708, 11182539556, 11233450250 ],\n \"samples_ts\": [ 45.7053, 45.7857, 45.5782 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:02:37Z\",\n \"avg_ns\": 37668387010,\n \"stddev_ns\": 44287672,\n \"avg_ts\": 13.592312,\n \"stddev_ts\": 0.015972,\n \"samples_ns\": [ 37653256054, 37633648223, 37718256754 ],\n \"samples_ts\": [ 13.5978, 13.6048, 13.5743 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T04:01:52Z", - "avg_ns": 11206063171, - "stddev_ns": 25674513, - "avg_ts": 45.689711, - "stddev_ts": 0.104627, - "samples_ns": [ - 11202199708, - 11182539556, - 11233450250 - ], - "samples_ts": [ - 45.7053, - 45.7857, - 45.5782 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T04:02:37Z", - "avg_ns": 37668387010, - "stddev_ns": 44287672, - "avg_ts": 13.592312, - "stddev_ts": 0.015972, - "samples_ns": [ - 37653256054, - 37633648223, - 37718256754 - ], - "samples_ts": [ - 13.5978, - 13.6048, - 13.5743 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 351 - }, - { - "timestamp_utc": "2025-12-09T04:05:11.852412+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:04:31Z\",\n \"avg_ns\": 2780450151,\n \"stddev_ns\": 11516597,\n \"avg_ts\": 46.036238,\n \"stddev_ts\": 0.190874,\n \"samples_ns\": [ 2790893499, 2768098653, 2782358301 ],\n \"samples_ts\": [ 45.8634, 46.2411, 46.0041 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:04:42Z\",\n \"avg_ns\": 9711720792,\n \"stddev_ns\": 18724095,\n \"avg_ts\": 13.179983,\n \"stddev_ts\": 0.025384,\n \"samples_ns\": [ 9698703765, 9703279722, 9733178891 ],\n \"samples_ts\": [ 13.1976, 13.1914, 13.1509 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T04:04:31Z", - "avg_ns": 2780450151, - "stddev_ns": 11516597, - "avg_ts": 46.036238, - "stddev_ts": 0.190874, - "samples_ns": [ - 2790893499, - 2768098653, - 2782358301 - ], - "samples_ts": [ - 45.8634, - 46.2411, - 46.0041 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T04:04:42Z", - "avg_ns": 9711720792, - "stddev_ns": 18724095, - "avg_ts": 13.179983, - "stddev_ts": 0.025384, - "samples_ns": [ - 9698703765, - 9703279722, - 9733178891 - ], - "samples_ts": [ - 13.1976, - 13.1914, - 13.1509 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 352 - }, - { - "timestamp_utc": "2025-12-09T04:07:16.881570+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:05:12Z\",\n \"avg_ns\": 2802724955,\n \"stddev_ns\": 5604643,\n \"avg_ts\": 45.669962,\n \"stddev_ts\": 0.091305,\n \"samples_ns\": [ 2808545252, 2797364269, 2802265344 ],\n \"samples_ts\": [ 45.5752, 45.7574, 45.6773 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:05:23Z\",\n \"avg_ns\": 37651131292,\n \"stddev_ns\": 63514966,\n \"avg_ts\": 13.598555,\n \"stddev_ts\": 0.022946,\n \"samples_ns\": [ 37711168915, 37657591086, 37584633877 ],\n \"samples_ts\": [ 13.5769, 13.5962, 13.6226 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T04:05:12Z", - "avg_ns": 2802724955, - "stddev_ns": 5604643, - "avg_ts": 45.669962, - "stddev_ts": 0.091305, - "samples_ns": [ - 2808545252, - 2797364269, - 2802265344 - ], - "samples_ts": [ - 45.5752, - 45.7574, - 45.6773 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T04:05:23Z", - "avg_ns": 37651131292, - "stddev_ns": 63514966, - "avg_ts": 13.598555, - "stddev_ts": 0.022946, - "samples_ns": [ - 37711168915, - 37657591086, - 37584633877 - ], - "samples_ts": [ - 13.5769, - 13.5962, - 13.6226 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 353 - }, - { - "timestamp_utc": "2025-12-09T04:08:30.619629+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:07:17Z\",\n \"avg_ns\": 11202107563,\n \"stddev_ns\": 21990956,\n \"avg_ts\": 45.705802,\n \"stddev_ts\": 0.089745,\n \"samples_ns\": [ 11203798748, 11179320096, 11223203846 ],\n \"samples_ts\": [ 45.6988, 45.7988, 45.6198 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:08:02Z\",\n \"avg_ns\": 9363667767,\n \"stddev_ns\": 33840271,\n \"avg_ts\": 13.669976,\n \"stddev_ts\": 0.049326,\n \"samples_ns\": [ 9352849116, 9401594353, 9336559833 ],\n \"samples_ts\": [ 13.6857, 13.6147, 13.7095 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T04:07:17Z", - "avg_ns": 11202107563, - "stddev_ns": 21990956, - "avg_ts": 45.705802, - "stddev_ts": 0.089745, - "samples_ns": [ - 11203798748, - 11179320096, - 11223203846 - ], - "samples_ts": [ - 45.6988, - 45.7988, - 45.6198 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T04:08:02Z", - "avg_ns": 9363667767, - "stddev_ns": 33840271, - "avg_ts": 13.669976, - "stddev_ts": 0.049326, - "samples_ns": [ - 9352849116, - 9401594353, - 9336559833 - ], - "samples_ts": [ - 13.6857, - 13.6147, - 13.7095 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 354 - }, - { - "timestamp_utc": "2025-12-09T04:11:09.913788+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:08:31Z\",\n \"avg_ns\": 11149158239,\n \"stddev_ns\": 13689142,\n \"avg_ts\": 45.922796,\n \"stddev_ts\": 0.056344,\n \"samples_ns\": [ 11140658186, 11164949206, 11141867326 ],\n \"samples_ts\": [ 45.9578, 45.8578, 45.9528 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:09:15Z\",\n \"avg_ns\": 37932949685,\n \"stddev_ns\": 117859055,\n \"avg_ts\": 13.497587,\n \"stddev_ts\": 0.041954,\n \"samples_ns\": [ 37810391850, 38045467649, 37942989556 ],\n \"samples_ts\": [ 13.5413, 13.4576, 13.4939 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T04:08:31Z", - "avg_ns": 11149158239, - "stddev_ns": 13689142, - "avg_ts": 45.922796, - "stddev_ts": 0.056344, - "samples_ns": [ - 11140658186, - 11164949206, - 11141867326 - ], - "samples_ts": [ - 45.9578, - 45.8578, - 45.9528 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T04:09:15Z", - "avg_ns": 37932949685, - "stddev_ns": 117859055, - "avg_ts": 13.497587, - "stddev_ts": 0.041954, - "samples_ns": [ - 37810391850, - 38045467649, - 37942989556 - ], - "samples_ts": [ - 13.5413, - 13.4576, - 13.4939 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 355 - }, - { - "timestamp_utc": "2025-12-09T04:11:51.129687+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:11:10Z\",\n \"avg_ns\": 2795437242,\n \"stddev_ns\": 11044711,\n \"avg_ts\": 45.789378,\n \"stddev_ts\": 0.180914,\n \"samples_ns\": [ 2784381181, 2806470316, 2795460230 ],\n \"samples_ts\": [ 45.9707, 45.6089, 45.7885 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:11:21Z\",\n \"avg_ns\": 9732318384,\n \"stddev_ns\": 26720494,\n \"avg_ts\": 13.152122,\n \"stddev_ts\": 0.036058,\n \"samples_ns\": [ 9713158562, 9720954066, 9762842524 ],\n \"samples_ts\": [ 13.178, 13.1674, 13.1109 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T04:11:10Z", - "avg_ns": 2795437242, - "stddev_ns": 11044711, - "avg_ts": 45.789378, - "stddev_ts": 0.180914, - "samples_ns": [ - 2784381181, - 2806470316, - 2795460230 - ], - "samples_ts": [ - 45.9707, - 45.6089, - 45.7885 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T04:11:21Z", - "avg_ns": 9732318384, - "stddev_ns": 26720494, - "avg_ts": 13.152122, - "stddev_ts": 0.036058, - "samples_ns": [ - 9713158562, - 9720954066, - 9762842524 - ], - "samples_ts": [ - 13.178, - 13.1674, - 13.1109 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 356 - }, - { - "timestamp_utc": "2025-12-09T04:14:00.254563+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:11:51Z\",\n \"avg_ns\": 2782211199,\n \"stddev_ns\": 15364924,\n \"avg_ts\": 46.007505,\n \"stddev_ts\": 0.253385,\n \"samples_ns\": [ 2799667764, 2776226890, 2770738944 ],\n \"samples_ts\": [ 45.7197, 46.1057, 46.1971 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:12:02Z\",\n \"avg_ns\": 39049741785,\n \"stddev_ns\": 102846451,\n \"avg_ts\": 13.111543,\n \"stddev_ts\": 0.034524,\n \"samples_ns\": [ 38950246092, 39155639163, 39043340102 ],\n \"samples_ts\": [ 13.145, 13.076, 13.1136 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T04:11:51Z", - "avg_ns": 2782211199, - "stddev_ns": 15364924, - "avg_ts": 46.007505, - "stddev_ts": 0.253385, - "samples_ns": [ - 2799667764, - 2776226890, - 2770738944 - ], - "samples_ts": [ - 45.7197, - 46.1057, - 46.1971 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T04:12:02Z", - "avg_ns": 39049741785, - "stddev_ns": 102846451, - "avg_ts": 13.111543, - "stddev_ts": 0.034524, - "samples_ns": [ - 38950246092, - 39155639163, - 39043340102 - ], - "samples_ts": [ - 13.145, - 13.076, - 13.1136 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 357 - }, - { - "timestamp_utc": "2025-12-09T04:15:14.500034+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:14:00Z\",\n \"avg_ns\": 11335750956,\n \"stddev_ns\": 11106854,\n \"avg_ts\": 45.166865,\n \"stddev_ts\": 0.044254,\n \"samples_ns\": [ 11346569431, 11336304940, 11324378499 ],\n \"samples_ts\": [ 45.1238, 45.1646, 45.2122 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:14:46Z\",\n \"avg_ns\": 9343551297,\n \"stddev_ns\": 62742953,\n \"avg_ts\": 13.699699,\n \"stddev_ts\": 0.091833,\n \"samples_ns\": [ 9332063733, 9411244218, 9287345941 ],\n \"samples_ts\": [ 13.7162, 13.6008, 13.7822 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T04:14:00Z", - "avg_ns": 11335750956, - "stddev_ns": 11106854, - "avg_ts": 45.166865, - "stddev_ts": 0.044254, - "samples_ns": [ - 11346569431, - 11336304940, - 11324378499 - ], - "samples_ts": [ - 45.1238, - 45.1646, - 45.2122 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T04:14:46Z", - "avg_ns": 9343551297, - "stddev_ns": 62742953, - "avg_ts": 13.699699, - "stddev_ts": 0.091833, - "samples_ns": [ - 9332063733, - 9411244218, - 9287345941 - ], - "samples_ts": [ - 13.7162, - 13.6008, - 13.7822 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 358 - }, - { - "timestamp_utc": "2025-12-09T04:17:54.172075+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:15:15Z\",\n \"avg_ns\": 11390388225,\n \"stddev_ns\": 12366140,\n \"avg_ts\": 44.950215,\n \"stddev_ts\": 0.048781,\n \"samples_ns\": [ 11387385452, 11379800462, 11403978762 ],\n \"samples_ts\": [ 44.962, 44.992, 44.8966 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:16:00Z\",\n \"avg_ns\": 37747980697,\n \"stddev_ns\": 6311260,\n \"avg_ts\": 13.563640,\n \"stddev_ts\": 0.002268,\n \"samples_ns\": [ 37755148514, 37745536441, 37743257136 ],\n \"samples_ts\": [ 13.5611, 13.5645, 13.5653 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T04:15:15Z", - "avg_ns": 11390388225, - "stddev_ns": 12366140, - "avg_ts": 44.950215, - "stddev_ts": 0.048781, - "samples_ns": [ - 11387385452, - 11379800462, - 11403978762 - ], - "samples_ts": [ - 44.962, - 44.992, - 44.8966 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_type": "gemma3 1B Q8_0", - "model_size": 1062773248, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T04:16:00Z", - "avg_ns": 37747980697, - "stddev_ns": 6311260, - "avg_ts": 13.56364, - "stddev_ts": 0.002268, - "samples_ns": [ - 37755148514, - 37745536441, - 37743257136 - ], - "samples_ts": [ - 13.5611, - 13.5645, - 13.5653 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 359 - }, - { - "timestamp_utc": "2025-12-09T04:19:03.701485+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:17:59Z\",\n \"avg_ns\": 5620038820,\n \"stddev_ns\": 7053294,\n \"avg_ts\": 22.775667,\n \"stddev_ts\": 0.028597,\n \"samples_ns\": [ 5622213355, 5612154723, 5625748383 ],\n \"samples_ts\": [ 22.7668, 22.8076, 22.7525 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:18:21Z\",\n \"avg_ns\": 13883052206,\n \"stddev_ns\": 48763607,\n \"avg_ts\": 9.219951,\n \"stddev_ts\": 0.032433,\n \"samples_ns\": [ 13898221417, 13922428042, 13828507161 ],\n \"samples_ts\": [ 9.20981, 9.1938, 9.25624 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T04:17:59Z", - "avg_ns": 5620038820, - "stddev_ns": 7053294, - "avg_ts": 22.775667, - "stddev_ts": 0.028597, - "samples_ns": [ - 5622213355, - 5612154723, - 5625748383 - ], - "samples_ts": [ - 22.7668, - 22.8076, - 22.7525 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T04:18:21Z", - "avg_ns": 13883052206, - "stddev_ns": 48763607, - "avg_ts": 9.219951, - "stddev_ts": 0.032433, - "samples_ns": [ - 13898221417, - 13922428042, - 13828507161 - ], - "samples_ts": [ - 9.20981, - 9.1938, - 9.25624 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 360 - }, - { - "timestamp_utc": "2025-12-09T04:22:10.719201+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:19:04Z\",\n \"avg_ns\": 5612960025,\n \"stddev_ns\": 7023977,\n \"avg_ts\": 22.804391,\n \"stddev_ts\": 0.028529,\n \"samples_ns\": [ 5620481007, 5606570394, 5611828674 ],\n \"samples_ts\": [ 22.7739, 22.8304, 22.809 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:19:27Z\",\n \"avg_ns\": 54476033950,\n \"stddev_ns\": 134478161,\n \"avg_ts\": 9.398667,\n \"stddev_ts\": 0.023206,\n \"samples_ns\": [ 54337816040, 54483854790, 54606431020 ],\n \"samples_ts\": [ 9.42254, 9.39728, 9.37619 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T04:19:04Z", - "avg_ns": 5612960025, - "stddev_ns": 7023977, - "avg_ts": 22.804391, - "stddev_ts": 0.028529, - "samples_ns": [ - 5620481007, - 5606570394, - 5611828674 - ], - "samples_ts": [ - 22.7739, - 22.8304, - 22.809 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T04:19:27Z", - "avg_ns": 54476033950, - "stddev_ns": 134478161, - "avg_ts": 9.398667, - "stddev_ts": 0.023206, - "samples_ns": [ - 54337816040, - 54483854790, - 54606431020 - ], - "samples_ts": [ - 9.42254, - 9.39728, - 9.37619 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 361 - }, - { - "timestamp_utc": "2025-12-09T04:24:22.584061+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:22:11Z\",\n \"avg_ns\": 22484501764,\n \"stddev_ns\": 19893499,\n \"avg_ts\": 22.771253,\n \"stddev_ts\": 0.020152,\n \"samples_ns\": [ 22462983936, 22502224056, 22488297300 ],\n \"samples_ts\": [ 22.7931, 22.7533, 22.7674 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:23:41Z\",\n \"avg_ns\": 13618249198,\n \"stddev_ns\": 41051613,\n \"avg_ts\": 9.399209,\n \"stddev_ts\": 0.028366,\n \"samples_ns\": [ 13572732440, 13629545630, 13652469526 ],\n \"samples_ts\": [ 9.43067, 9.39136, 9.37559 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T04:22:11Z", - "avg_ns": 22484501764, - "stddev_ns": 19893499, - "avg_ts": 22.771253, - "stddev_ts": 0.020152, - "samples_ns": [ - 22462983936, - 22502224056, - 22488297300 - ], - "samples_ts": [ - 22.7931, - 22.7533, - 22.7674 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T04:23:41Z", - "avg_ns": 13618249198, - "stddev_ns": 41051613, - "avg_ts": 9.399209, - "stddev_ts": 0.028366, - "samples_ns": [ - 13572732440, - 13629545630, - 13652469526 - ], - "samples_ts": [ - 9.43067, - 9.39136, - 9.37559 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 362 - }, - { - "timestamp_utc": "2025-12-09T04:28:36.947715+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:24:23Z\",\n \"avg_ns\": 22579471804,\n \"stddev_ns\": 10451059,\n \"avg_ts\": 22.675467,\n \"stddev_ts\": 0.010496,\n \"samples_ns\": [ 22567693838, 22583093827, 22587627749 ],\n \"samples_ts\": [ 22.6873, 22.6718, 22.6673 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:25:53Z\",\n \"avg_ns\": 54327406817,\n \"stddev_ns\": 107354468,\n \"avg_ts\": 9.424365,\n \"stddev_ts\": 0.018603,\n \"samples_ns\": [ 54450436814, 54279031169, 54252752469 ],\n \"samples_ts\": [ 9.40305, 9.43274, 9.43731 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T04:24:23Z", - "avg_ns": 22579471804, - "stddev_ns": 10451059, - "avg_ts": 22.675467, - "stddev_ts": 0.010496, - "samples_ns": [ - 22567693838, - 22583093827, - 22587627749 - ], - "samples_ts": [ - 22.6873, - 22.6718, - 22.6673 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T04:25:53Z", - "avg_ns": 54327406817, - "stddev_ns": 107354468, - "avg_ts": 9.424365, - "stddev_ts": 0.018603, - "samples_ns": [ - 54450436814, - 54279031169, - 54252752469 - ], - "samples_ts": [ - 9.40305, - 9.43274, - 9.43731 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 363 - }, - { - "timestamp_utc": "2025-12-09T04:29:42.391174+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:28:37Z\",\n \"avg_ns\": 5612398317,\n \"stddev_ns\": 3376744,\n \"avg_ts\": 22.806655,\n \"stddev_ts\": 0.013726,\n \"samples_ns\": [ 5613937767, 5608526178, 5614731006 ],\n \"samples_ts\": [ 22.8004, 22.8224, 22.7972 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:29:00Z\",\n \"avg_ns\": 13965044801,\n \"stddev_ns\": 12348214,\n \"avg_ts\": 9.165747,\n \"stddev_ts\": 0.008101,\n \"samples_ns\": [ 13979106491, 13960058474, 13955969438 ],\n \"samples_ts\": [ 9.15652, 9.16902, 9.1717 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T04:28:37Z", - "avg_ns": 5612398317, - "stddev_ns": 3376744, - "avg_ts": 22.806655, - "stddev_ts": 0.013726, - "samples_ns": [ - 5613937767, - 5608526178, - 5614731006 - ], - "samples_ts": [ - 22.8004, - 22.8224, - 22.7972 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T04:29:00Z", - "avg_ns": 13965044801, - "stddev_ns": 12348214, - "avg_ts": 9.165747, - "stddev_ts": 0.008101, - "samples_ns": [ - 13979106491, - 13960058474, - 13955969438 - ], - "samples_ts": [ - 9.15652, - 9.16902, - 9.1717 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 364 - }, - { - "timestamp_utc": "2025-12-09T04:32:49.353814+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:29:43Z\",\n \"avg_ns\": 5615878878,\n \"stddev_ns\": 9838762,\n \"avg_ts\": 22.792561,\n \"stddev_ts\": 0.039964,\n \"samples_ns\": [ 5604790110, 5623563510, 5619283014 ],\n \"samples_ts\": [ 22.8376, 22.7614, 22.7787 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:30:05Z\",\n \"avg_ns\": 54473749387,\n \"stddev_ns\": 126985701,\n \"avg_ts\": 9.399057,\n \"stddev_ts\": 0.021910,\n \"samples_ns\": [ 54601219102, 54472775762, 54347253297 ],\n \"samples_ts\": [ 9.37708, 9.39919, 9.4209 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T04:29:43Z", - "avg_ns": 5615878878, - "stddev_ns": 9838762, - "avg_ts": 22.792561, - "stddev_ts": 0.039964, - "samples_ns": [ - 5604790110, - 5623563510, - 5619283014 - ], - "samples_ts": [ - 22.8376, - 22.7614, - 22.7787 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T04:30:05Z", - "avg_ns": 54473749387, - "stddev_ns": 126985701, - "avg_ts": 9.399057, - "stddev_ts": 0.02191, - "samples_ns": [ - 54601219102, - 54472775762, - 54347253297 - ], - "samples_ts": [ - 9.37708, - 9.39919, - 9.4209 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 365 - }, - { - "timestamp_utc": "2025-12-09T04:35:01.461468+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:32:50Z\",\n \"avg_ns\": 22573556736,\n \"stddev_ns\": 28053737,\n \"avg_ts\": 22.681429,\n \"stddev_ts\": 0.028191,\n \"samples_ns\": [ 22544768397, 22600813092, 22575088719 ],\n \"samples_ts\": [ 22.7104, 22.6541, 22.6799 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:34:20Z\",\n \"avg_ns\": 13579916807,\n \"stddev_ns\": 36104321,\n \"avg_ts\": 9.425728,\n \"stddev_ts\": 0.025026,\n \"samples_ns\": [ 13565650054, 13620974412, 13553125956 ],\n \"samples_ts\": [ 9.4356, 9.39727, 9.44432 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T04:32:50Z", - "avg_ns": 22573556736, - "stddev_ns": 28053737, - "avg_ts": 22.681429, - "stddev_ts": 0.028191, - "samples_ns": [ - 22544768397, - 22600813092, - 22575088719 - ], - "samples_ts": [ - 22.7104, - 22.6541, - 22.6799 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T04:34:20Z", - "avg_ns": 13579916807, - "stddev_ns": 36104321, - "avg_ts": 9.425728, - "stddev_ts": 0.025026, - "samples_ns": [ - 13565650054, - 13620974412, - 13553125956 - ], - "samples_ts": [ - 9.4356, - 9.39727, - 9.44432 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 366 - }, - { - "timestamp_utc": "2025-12-09T04:39:15.839018+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:35:02Z\",\n \"avg_ns\": 22587961406,\n \"stddev_ns\": 13590545,\n \"avg_ts\": 22.666947,\n \"stddev_ts\": 0.013637,\n \"samples_ns\": [ 22588153680, 22601453133, 22574277407 ],\n \"samples_ts\": [ 22.6667, 22.6534, 22.6807 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:36:32Z\",\n \"avg_ns\": 54298618190,\n \"stddev_ns\": 126341868,\n \"avg_ts\": 9.429372,\n \"stddev_ts\": 0.021924,\n \"samples_ns\": [ 54270173440, 54436757196, 54188923936 ],\n \"samples_ts\": [ 9.43428, 9.40541, 9.44843 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T04:35:02Z", - "avg_ns": 22587961406, - "stddev_ns": 13590545, - "avg_ts": 22.666947, - "stddev_ts": 0.013637, - "samples_ns": [ - 22588153680, - 22601453133, - 22574277407 - ], - "samples_ts": [ - 22.6667, - 22.6534, - 22.6807 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T04:36:32Z", - "avg_ns": 54298618190, - "stddev_ns": 126341868, - "avg_ts": 9.429372, - "stddev_ts": 0.021924, - "samples_ns": [ - 54270173440, - 54436757196, - 54188923936 - ], - "samples_ts": [ - 9.43428, - 9.40541, - 9.44843 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 367 - }, - { - "timestamp_utc": "2025-12-09T04:40:21.207996+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:39:16Z\",\n \"avg_ns\": 5604800628,\n \"stddev_ns\": 6029171,\n \"avg_ts\": 22.837583,\n \"stddev_ts\": 0.024574,\n \"samples_ns\": [ 5609633304, 5606722912, 5598045670 ],\n \"samples_ts\": [ 22.8179, 22.8297, 22.8651 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:39:39Z\",\n \"avg_ns\": 13963123462,\n \"stddev_ns\": 21187047,\n \"avg_ts\": 9.167017,\n \"stddev_ts\": 0.013903,\n \"samples_ns\": [ 13959101322, 13986032615, 13944236451 ],\n \"samples_ts\": [ 9.16964, 9.15199, 9.17942 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T04:39:16Z", - "avg_ns": 5604800628, - "stddev_ns": 6029171, - "avg_ts": 22.837583, - "stddev_ts": 0.024574, - "samples_ns": [ - 5609633304, - 5606722912, - 5598045670 - ], - "samples_ts": [ - 22.8179, - 22.8297, - 22.8651 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T04:39:39Z", - "avg_ns": 13963123462, - "stddev_ns": 21187047, - "avg_ts": 9.167017, - "stddev_ts": 0.013903, - "samples_ns": [ - 13959101322, - 13986032615, - 13944236451 - ], - "samples_ts": [ - 9.16964, - 9.15199, - 9.17942 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 368 - }, - { - "timestamp_utc": "2025-12-09T04:43:28.932900+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:40:22Z\",\n \"avg_ns\": 5699824163,\n \"stddev_ns\": 12953737,\n \"avg_ts\": 22.456910,\n \"stddev_ts\": 0.051045,\n \"samples_ns\": [ 5686521279, 5712396995, 5700554217 ],\n \"samples_ts\": [ 22.5094, 22.4074, 22.454 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:40:44Z\",\n \"avg_ns\": 54615790298,\n \"stddev_ns\": 83163750,\n \"avg_ts\": 9.374593,\n \"stddev_ts\": 0.014286,\n \"samples_ns\": [ 54521181744, 54677342951, 54648846201 ],\n \"samples_ts\": [ 9.39085, 9.36402, 9.36891 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T04:40:22Z", - "avg_ns": 5699824163, - "stddev_ns": 12953737, - "avg_ts": 22.45691, - "stddev_ts": 0.051045, - "samples_ns": [ - 5686521279, - 5712396995, - 5700554217 - ], - "samples_ts": [ - 22.5094, - 22.4074, - 22.454 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T04:40:44Z", - "avg_ns": 54615790298, - "stddev_ns": 83163750, - "avg_ts": 9.374593, - "stddev_ts": 0.014286, - "samples_ns": [ - 54521181744, - 54677342951, - 54648846201 - ], - "samples_ts": [ - 9.39085, - 9.36402, - 9.36891 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 369 - }, - { - "timestamp_utc": "2025-12-09T04:45:42.371944+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:43:29Z\",\n \"avg_ns\": 23035532254,\n \"stddev_ns\": 44057935,\n \"avg_ts\": 22.226586,\n \"stddev_ts\": 0.042514,\n \"samples_ns\": [ 23036712454, 22990896338, 23078987971 ],\n \"samples_ts\": [ 22.2254, 22.2697, 22.1847 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:45:02Z\",\n \"avg_ns\": 13390334336,\n \"stddev_ns\": 19142006,\n \"avg_ts\": 9.559147,\n \"stddev_ts\": 0.013654,\n \"samples_ns\": [ 13376764616, 13382010039, 13412228355 ],\n \"samples_ts\": [ 9.56883, 9.56508, 9.54353 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T04:43:29Z", - "avg_ns": 23035532254, - "stddev_ns": 44057935, - "avg_ts": 22.226586, - "stddev_ts": 0.042514, - "samples_ns": [ - 23036712454, - 22990896338, - 23078987971 - ], - "samples_ts": [ - 22.2254, - 22.2697, - 22.1847 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T04:45:02Z", - "avg_ns": 13390334336, - "stddev_ns": 19142006, - "avg_ts": 9.559147, - "stddev_ts": 0.013654, - "samples_ns": [ - 13376764616, - 13382010039, - 13412228355 - ], - "samples_ts": [ - 9.56883, - 9.56508, - 9.54353 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 370 - }, - { - "timestamp_utc": "2025-12-09T04:49:59.696424+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:45:43Z\",\n \"avg_ns\": 22859834330,\n \"stddev_ns\": 30936418,\n \"avg_ts\": 22.397390,\n \"stddev_ts\": 0.030296,\n \"samples_ns\": [ 22852151202, 22833463872, 22893887917 ],\n \"samples_ts\": [ 22.4049, 22.4232, 22.364 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:47:14Z\",\n \"avg_ns\": 54927665300,\n \"stddev_ns\": 51421587,\n \"avg_ts\": 9.321356,\n \"stddev_ts\": 0.008729,\n \"samples_ns\": [ 54942802295, 54870375346, 54969818261 ],\n \"samples_ts\": [ 9.31878, 9.33108, 9.3142 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T04:45:43Z", - "avg_ns": 22859834330, - "stddev_ns": 30936418, - "avg_ts": 22.39739, - "stddev_ts": 0.030296, - "samples_ns": [ - 22852151202, - 22833463872, - 22893887917 - ], - "samples_ts": [ - 22.4049, - 22.4232, - 22.364 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T04:47:14Z", - "avg_ns": 54927665300, - "stddev_ns": 51421587, - "avg_ts": 9.321356, - "stddev_ts": 0.008729, - "samples_ns": [ - 54942802295, - 54870375346, - 54969818261 - ], - "samples_ts": [ - 9.31878, - 9.33108, - 9.3142 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 371 - }, - { - "timestamp_utc": "2025-12-09T04:51:04.854904+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:50:00Z\",\n \"avg_ns\": 5608969417,\n \"stddev_ns\": 18553394,\n \"avg_ts\": 22.820758,\n \"stddev_ts\": 0.075387,\n \"samples_ns\": [ 5593715494, 5603568837, 5629623920 ],\n \"samples_ts\": [ 22.8828, 22.8426, 22.7369 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:50:23Z\",\n \"avg_ns\": 13869689791,\n \"stddev_ns\": 49351106,\n \"avg_ts\": 9.228835,\n \"stddev_ts\": 0.032903,\n \"samples_ns\": [ 13812923071, 13893753598, 13902392706 ],\n \"samples_ts\": [ 9.26668, 9.21277, 9.20705 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T04:50:00Z", - "avg_ns": 5608969417, - "stddev_ns": 18553394, - "avg_ts": 22.820758, - "stddev_ts": 0.075387, - "samples_ns": [ - 5593715494, - 5603568837, - 5629623920 - ], - "samples_ts": [ - 22.8828, - 22.8426, - 22.7369 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T04:50:23Z", - "avg_ns": 13869689791, - "stddev_ns": 49351106, - "avg_ts": 9.228835, - "stddev_ts": 0.032903, - "samples_ns": [ - 13812923071, - 13893753598, - 13902392706 - ], - "samples_ts": [ - 9.26668, - 9.21277, - 9.20705 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 372 - }, - { - "timestamp_utc": "2025-12-09T04:54:11.562257+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:51:05Z\",\n \"avg_ns\": 5637772986,\n \"stddev_ns\": 5560909,\n \"avg_ts\": 22.704015,\n \"stddev_ts\": 0.022380,\n \"samples_ns\": [ 5634660974, 5634465392, 5644192593 ],\n \"samples_ts\": [ 22.7165, 22.7173, 22.6782 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:51:28Z\",\n \"avg_ns\": 54347639018,\n \"stddev_ns\": 53579687,\n \"avg_ts\": 9.420839,\n \"stddev_ts\": 0.009287,\n \"samples_ns\": [ 54402293291, 54345419878, 54295203886 ],\n \"samples_ts\": [ 9.41137, 9.42122, 9.42993 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T04:51:05Z", - "avg_ns": 5637772986, - "stddev_ns": 5560909, - "avg_ts": 22.704015, - "stddev_ts": 0.02238, - "samples_ns": [ - 5634660974, - 5634465392, - 5644192593 - ], - "samples_ts": [ - 22.7165, - 22.7173, - 22.6782 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T04:51:28Z", - "avg_ns": 54347639018, - "stddev_ns": 53579687, - "avg_ts": 9.420839, - "stddev_ts": 0.009287, - "samples_ns": [ - 54402293291, - 54345419878, - 54295203886 - ], - "samples_ts": [ - 9.41137, - 9.42122, - 9.42993 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 373 - }, - { - "timestamp_utc": "2025-12-09T04:56:24.583000+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:54:12Z\",\n \"avg_ns\": 22614577340,\n \"stddev_ns\": 27168416,\n \"avg_ts\": 22.640286,\n \"stddev_ts\": 0.027204,\n \"samples_ns\": [ 22617782751, 22640000024, 22585949247 ],\n \"samples_ts\": [ 22.6371, 22.6148, 22.669 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:55:42Z\",\n \"avg_ns\": 13824078683,\n \"stddev_ns\": 21769841,\n \"avg_ts\": 9.259222,\n \"stddev_ts\": 0.014583,\n \"samples_ns\": [ 13845163754, 13801684431, 13825387866 ],\n \"samples_ts\": [ 9.24511, 9.27423, 9.25833 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T04:54:12Z", - "avg_ns": 22614577340, - "stddev_ns": 27168416, - "avg_ts": 22.640286, - "stddev_ts": 0.027204, - "samples_ns": [ - 22617782751, - 22640000024, - 22585949247 - ], - "samples_ts": [ - 22.6371, - 22.6148, - 22.669 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T04:55:42Z", - "avg_ns": 13824078683, - "stddev_ns": 21769841, - "avg_ts": 9.259222, - "stddev_ts": 0.014583, - "samples_ns": [ - 13845163754, - 13801684431, - 13825387866 - ], - "samples_ts": [ - 9.24511, - 9.27423, - 9.25833 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 374 - }, - { - "timestamp_utc": "2025-12-09T05:00:39.777249+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:56:25Z\",\n \"avg_ns\": 22824641041,\n \"stddev_ns\": 39733803,\n \"avg_ts\": 22.431942,\n \"stddev_ts\": 0.039013,\n \"samples_ns\": [ 22870163628, 22806829502, 22796929995 ],\n \"samples_ts\": [ 22.3872, 22.4494, 22.4592 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:57:56Z\",\n \"avg_ns\": 54241900217,\n \"stddev_ns\": 73306205,\n \"avg_ts\": 9.439209,\n \"stddev_ts\": 0.012750,\n \"samples_ns\": [ 54323350409, 54181222904, 54221127340 ],\n \"samples_ts\": [ 9.42504, 9.44977, 9.44281 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T04:56:25Z", - "avg_ns": 22824641041, - "stddev_ns": 39733803, - "avg_ts": 22.431942, - "stddev_ts": 0.039013, - "samples_ns": [ - 22870163628, - 22806829502, - 22796929995 - ], - "samples_ts": [ - 22.3872, - 22.4494, - 22.4592 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T04:57:56Z", - "avg_ns": 54241900217, - "stddev_ns": 73306205, - "avg_ts": 9.439209, - "stddev_ts": 0.01275, - "samples_ns": [ - 54323350409, - 54181222904, - 54221127340 - ], - "samples_ts": [ - 9.42504, - 9.44977, - 9.44281 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 375 - }, - { - "timestamp_utc": "2025-12-09T05:01:44.616423+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:00:40Z\",\n \"avg_ns\": 5639868728,\n \"stddev_ns\": 5905466,\n \"avg_ts\": 22.695580,\n \"stddev_ts\": 0.023765,\n \"samples_ns\": [ 5645410664, 5640537015, 5633658507 ],\n \"samples_ts\": [ 22.6733, 22.6929, 22.7206 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:01:03Z\",\n \"avg_ns\": 13732436684,\n \"stddev_ns\": 21497734,\n \"avg_ts\": 9.321012,\n \"stddev_ts\": 0.014579,\n \"samples_ns\": [ 13717461782, 13722779477, 13757068795 ],\n \"samples_ts\": [ 9.33117, 9.32756, 9.30431 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T05:00:40Z", - "avg_ns": 5639868728, - "stddev_ns": 5905466, - "avg_ts": 22.69558, - "stddev_ts": 0.023765, - "samples_ns": [ - 5645410664, - 5640537015, - 5633658507 - ], - "samples_ts": [ - 22.6733, - 22.6929, - 22.7206 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T05:01:03Z", - "avg_ns": 13732436684, - "stddev_ns": 21497734, - "avg_ts": 9.321012, - "stddev_ts": 0.014579, - "samples_ns": [ - 13717461782, - 13722779477, - 13757068795 - ], - "samples_ts": [ - 9.33117, - 9.32756, - 9.30431 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 376 - }, - { - "timestamp_utc": "2025-12-09T05:04:50.737790+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:01:45Z\",\n \"avg_ns\": 5628211366,\n \"stddev_ns\": 11015177,\n \"avg_ts\": 22.742630,\n \"stddev_ts\": 0.044557,\n \"samples_ns\": [ 5633274558, 5635784303, 5615575238 ],\n \"samples_ts\": [ 22.7221, 22.712, 22.7937 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:02:07Z\",\n \"avg_ns\": 54182194902,\n \"stddev_ns\": 58211715,\n \"avg_ts\": 9.449606,\n \"stddev_ts\": 0.010147,\n \"samples_ns\": [ 54137214565, 54247940966, 54161429177 ],\n \"samples_ts\": [ 9.45745, 9.43815, 9.45322 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T05:01:45Z", - "avg_ns": 5628211366, - "stddev_ns": 11015177, - "avg_ts": 22.74263, - "stddev_ts": 0.044557, - "samples_ns": [ - 5633274558, - 5635784303, - 5615575238 - ], - "samples_ts": [ - 22.7221, - 22.712, - 22.7937 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T05:02:07Z", - "avg_ns": 54182194902, - "stddev_ns": 58211715, - "avg_ts": 9.449606, - "stddev_ts": 0.010147, - "samples_ns": [ - 54137214565, - 54247940966, - 54161429177 - ], - "samples_ts": [ - 9.45745, - 9.43815, - 9.45322 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 377 - }, - { - "timestamp_utc": "2025-12-09T05:07:03.130738+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:04:51Z\",\n \"avg_ns\": 22770685984,\n \"stddev_ns\": 33196879,\n \"avg_ts\": 22.485081,\n \"stddev_ts\": 0.032803,\n \"samples_ns\": [ 22733113990, 22782892333, 22796051629 ],\n \"samples_ts\": [ 22.5222, 22.473, 22.46 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:06:22Z\",\n \"avg_ns\": 13396621658,\n \"stddev_ns\": 44459078,\n \"avg_ts\": 9.554718,\n \"stddev_ts\": 0.031703,\n \"samples_ns\": [ 13394921128, 13441906303, 13353037545 ],\n \"samples_ts\": [ 9.55586, 9.52246, 9.58583 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T05:04:51Z", - "avg_ns": 22770685984, - "stddev_ns": 33196879, - "avg_ts": 22.485081, - "stddev_ts": 0.032803, - "samples_ns": [ - 22733113990, - 22782892333, - 22796051629 - ], - "samples_ts": [ - 22.5222, - 22.473, - 22.46 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T05:06:22Z", - "avg_ns": 13396621658, - "stddev_ns": 44459078, - "avg_ts": 9.554718, - "stddev_ts": 0.031703, - "samples_ns": [ - 13394921128, - 13441906303, - 13353037545 - ], - "samples_ts": [ - 9.55586, - 9.52246, - 9.58583 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 378 - }, - { - "timestamp_utc": "2025-12-09T05:11:17.538056+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:07:04Z\",\n \"avg_ns\": 22675793368,\n \"stddev_ns\": 15101187,\n \"avg_ts\": 22.579151,\n \"stddev_ts\": 0.015033,\n \"samples_ns\": [ 22662849659, 22692384074, 22672146371 ],\n \"samples_ts\": [ 22.592, 22.5626, 22.5828 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:08:34Z\",\n \"avg_ns\": 54191370697,\n \"stddev_ns\": 51985005,\n \"avg_ts\": 9.448004,\n \"stddev_ts\": 0.009059,\n \"samples_ns\": [ 54250247580, 54151804310, 54172060201 ],\n \"samples_ts\": [ 9.43774, 9.4549, 9.45137 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T05:07:04Z", - "avg_ns": 22675793368, - "stddev_ns": 15101187, - "avg_ts": 22.579151, - "stddev_ts": 0.015033, - "samples_ns": [ - 22662849659, - 22692384074, - 22672146371 - ], - "samples_ts": [ - 22.592, - 22.5626, - 22.5828 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T05:08:34Z", - "avg_ns": 54191370697, - "stddev_ns": 51985005, - "avg_ts": 9.448004, - "stddev_ts": 0.009059, - "samples_ns": [ - 54250247580, - 54151804310, - 54172060201 - ], - "samples_ts": [ - 9.43774, - 9.4549, - 9.45137 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 379 - }, - { - "timestamp_utc": "2025-12-09T05:12:22.705772+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:11:18Z\",\n \"avg_ns\": 5674573496,\n \"stddev_ns\": 13393830,\n \"avg_ts\": 22.556845,\n \"stddev_ts\": 0.053171,\n \"samples_ns\": [ 5689996923, 5667853307, 5665870258 ],\n \"samples_ts\": [ 22.4956, 22.5835, 22.5914 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:11:41Z\",\n \"avg_ns\": 13785577967,\n \"stddev_ns\": 14926363,\n \"avg_ts\": 9.285073,\n \"stddev_ts\": 0.010048,\n \"samples_ns\": [ 13774063052, 13802441791, 13780229058 ],\n \"samples_ts\": [ 9.29283, 9.27372, 9.28867 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T05:11:18Z", - "avg_ns": 5674573496, - "stddev_ns": 13393830, - "avg_ts": 22.556845, - "stddev_ts": 0.053171, - "samples_ns": [ - 5689996923, - 5667853307, - 5665870258 - ], - "samples_ts": [ - 22.4956, - 22.5835, - 22.5914 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T05:11:41Z", - "avg_ns": 13785577967, - "stddev_ns": 14926363, - "avg_ts": 9.285073, - "stddev_ts": 0.010048, - "samples_ns": [ - 13774063052, - 13802441791, - 13780229058 - ], - "samples_ts": [ - 9.29283, - 9.27372, - 9.28867 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 380 - }, - { - "timestamp_utc": "2025-12-09T05:15:29.368921+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:12:23Z\",\n \"avg_ns\": 5633414452,\n \"stddev_ns\": 645101,\n \"avg_ts\": 22.721566,\n \"stddev_ts\": 0.002567,\n \"samples_ns\": [ 5633655023, 5632692933, 5633895402 ],\n \"samples_ts\": [ 22.7206, 22.7245, 22.7196 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:12:46Z\",\n \"avg_ns\": 54337853495,\n \"stddev_ns\": 26872166,\n \"avg_ts\": 9.422531,\n \"stddev_ts\": 0.004661,\n \"samples_ns\": [ 54307818293, 54359614533, 54346127660 ],\n \"samples_ts\": [ 9.42774, 9.41876, 9.42109 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T05:12:23Z", - "avg_ns": 5633414452, - "stddev_ns": 645101, - "avg_ts": 22.721566, - "stddev_ts": 0.002567, - "samples_ns": [ - 5633655023, - 5632692933, - 5633895402 - ], - "samples_ts": [ - 22.7206, - 22.7245, - 22.7196 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T05:12:46Z", - "avg_ns": 54337853495, - "stddev_ns": 26872166, - "avg_ts": 9.422531, - "stddev_ts": 0.004661, - "samples_ns": [ - 54307818293, - 54359614533, - 54346127660 - ], - "samples_ts": [ - 9.42774, - 9.41876, - 9.42109 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 381 - }, - { - "timestamp_utc": "2025-12-09T05:17:42.218959+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:15:30Z\",\n \"avg_ns\": 22850012933,\n \"stddev_ns\": 13946588,\n \"avg_ts\": 22.406995,\n \"stddev_ts\": 0.013676,\n \"samples_ns\": [ 22851997031, 22862859568, 22835182202 ],\n \"samples_ts\": [ 22.405, 22.3944, 22.4215 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:17:01Z\",\n \"avg_ns\": 13459569265,\n \"stddev_ns\": 13368231,\n \"avg_ts\": 9.509969,\n \"stddev_ts\": 0.009451,\n \"samples_ns\": [ 13468008524, 13466543106, 13444156165 ],\n \"samples_ts\": [ 9.504, 9.50504, 9.52087 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T05:15:30Z", - "avg_ns": 22850012933, - "stddev_ns": 13946588, - "avg_ts": 22.406995, - "stddev_ts": 0.013676, - "samples_ns": [ - 22851997031, - 22862859568, - 22835182202 - ], - "samples_ts": [ - 22.405, - 22.3944, - 22.4215 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T05:17:01Z", - "avg_ns": 13459569265, - "stddev_ns": 13368231, - "avg_ts": 9.509969, - "stddev_ts": 0.009451, - "samples_ns": [ - 13468008524, - 13466543106, - 13444156165 - ], - "samples_ts": [ - 9.504, - 9.50504, - 9.52087 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 382 - }, - { - "timestamp_utc": "2025-12-09T05:22:02.355272+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:17:43Z\",\n \"avg_ns\": 23066046656,\n \"stddev_ns\": 4355097,\n \"avg_ts\": 22.197129,\n \"stddev_ts\": 0.004186,\n \"samples_ns\": [ 23070536574, 23065751370, 23061852026 ],\n \"samples_ts\": [ 22.1928, 22.1974, 22.2012 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:19:15Z\",\n \"avg_ns\": 55578316023,\n \"stddev_ns\": 118079402,\n \"avg_ts\": 9.212254,\n \"stddev_ts\": 0.019596,\n \"samples_ns\": [ 55442094948, 55651476113, 55641377010 ],\n \"samples_ts\": [ 9.23486, 9.20012, 9.20179 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T05:17:43Z", - "avg_ns": 23066046656, - "stddev_ns": 4355097, - "avg_ts": 22.197129, - "stddev_ts": 0.004186, - "samples_ns": [ - 23070536574, - 23065751370, - 23061852026 - ], - "samples_ts": [ - 22.1928, - 22.1974, - 22.2012 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T05:19:15Z", - "avg_ns": 55578316023, - "stddev_ns": 118079402, - "avg_ts": 9.212254, - "stddev_ts": 0.019596, - "samples_ns": [ - 55442094948, - 55651476113, - 55641377010 - ], - "samples_ts": [ - 9.23486, - 9.20012, - 9.20179 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 383 - }, - { - "timestamp_utc": "2025-12-09T05:23:07.885976+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:22:03Z\",\n \"avg_ns\": 5626351728,\n \"stddev_ns\": 14064804,\n \"avg_ts\": 22.750183,\n \"stddev_ts\": 0.056822,\n \"samples_ns\": [ 5614236365, 5623043464, 5641775357 ],\n \"samples_ts\": [ 22.7992, 22.7635, 22.6879 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:22:25Z\",\n \"avg_ns\": 13936294061,\n \"stddev_ns\": 31977288,\n \"avg_ts\": 9.184683,\n \"stddev_ts\": 0.021047,\n \"samples_ns\": [ 13916701816, 13973194189, 13918986180 ],\n \"samples_ts\": [ 9.19758, 9.1604, 9.19607 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T05:22:03Z", - "avg_ns": 5626351728, - "stddev_ns": 14064804, - "avg_ts": 22.750183, - "stddev_ts": 0.056822, - "samples_ns": [ - 5614236365, - 5623043464, - 5641775357 - ], - "samples_ts": [ - 22.7992, - 22.7635, - 22.6879 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T05:22:25Z", - "avg_ns": 13936294061, - "stddev_ns": 31977288, - "avg_ts": 9.184683, - "stddev_ts": 0.021047, - "samples_ns": [ - 13916701816, - 13973194189, - 13918986180 - ], - "samples_ts": [ - 9.19758, - 9.1604, - 9.19607 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 384 - }, - { - "timestamp_utc": "2025-12-09T05:26:14.647644+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:23:08Z\",\n \"avg_ns\": 5694023335,\n \"stddev_ns\": 3379471,\n \"avg_ts\": 22.479716,\n \"stddev_ts\": 0.013342,\n \"samples_ns\": [ 5690267661, 5694986670, 5696815675 ],\n \"samples_ts\": [ 22.4945, 22.4759, 22.4687 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:23:31Z\",\n \"avg_ns\": 54294057381,\n \"stddev_ns\": 80585107,\n \"avg_ts\": 9.430143,\n \"stddev_ts\": 0.013995,\n \"samples_ns\": [ 54376999549, 54216057502, 54289115093 ],\n \"samples_ts\": [ 9.41575, 9.4437, 9.43099 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T05:23:08Z", - "avg_ns": 5694023335, - "stddev_ns": 3379471, - "avg_ts": 22.479716, - "stddev_ts": 0.013342, - "samples_ns": [ - 5690267661, - 5694986670, - 5696815675 - ], - "samples_ts": [ - 22.4945, - 22.4759, - 22.4687 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T05:23:31Z", - "avg_ns": 54294057381, - "stddev_ns": 80585107, - "avg_ts": 9.430143, - "stddev_ts": 0.013995, - "samples_ns": [ - 54376999549, - 54216057502, - 54289115093 - ], - "samples_ts": [ - 9.41575, - 9.4437, - 9.43099 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 385 - }, - { - "timestamp_utc": "2025-12-09T05:28:26.655964+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:26:15Z\",\n \"avg_ns\": 22760259105,\n \"stddev_ns\": 29538764,\n \"avg_ts\": 22.495375,\n \"stddev_ts\": 0.029173,\n \"samples_ns\": [ 22745397695, 22741102789, 22794276832 ],\n \"samples_ts\": [ 22.51, 22.5143, 22.4618 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:27:46Z\",\n \"avg_ns\": 13299608361,\n \"stddev_ns\": 49988180,\n \"avg_ts\": 9.624434,\n \"stddev_ts\": 0.036126,\n \"samples_ns\": [ 13354721597, 13257196103, 13286907385 ],\n \"samples_ts\": [ 9.58463, 9.65513, 9.63354 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T05:26:15Z", - "avg_ns": 22760259105, - "stddev_ns": 29538764, - "avg_ts": 22.495375, - "stddev_ts": 0.029173, - "samples_ns": [ - 22745397695, - 22741102789, - 22794276832 - ], - "samples_ts": [ - 22.51, - 22.5143, - 22.4618 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T05:27:46Z", - "avg_ns": 13299608361, - "stddev_ns": 49988180, - "avg_ts": 9.624434, - "stddev_ts": 0.036126, - "samples_ns": [ - 13354721597, - 13257196103, - 13286907385 - ], - "samples_ts": [ - 9.58463, - 9.65513, - 9.63354 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 386 - }, - { - "timestamp_utc": "2025-12-09T05:32:43.437042+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:28:27Z\",\n \"avg_ns\": 22602460841,\n \"stddev_ns\": 15234487,\n \"avg_ts\": 22.652408,\n \"stddev_ts\": 0.015265,\n \"samples_ns\": [ 22601182204, 22587907458, 22618292863 ],\n \"samples_ts\": [ 22.6537, 22.667, 22.6365 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:29:57Z\",\n \"avg_ns\": 55098963437,\n \"stddev_ns\": 101718167,\n \"avg_ts\": 9.292392,\n \"stddev_ts\": 0.017137,\n \"samples_ns\": [ 55216104079, 55047814910, 55032971323 ],\n \"samples_ts\": [ 9.27266, 9.301, 9.30351 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T05:28:27Z", - "avg_ns": 22602460841, - "stddev_ns": 15234487, - "avg_ts": 22.652408, - "stddev_ts": 0.015265, - "samples_ns": [ - 22601182204, - 22587907458, - 22618292863 - ], - "samples_ts": [ - 22.6537, - 22.667, - 22.6365 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T05:29:57Z", - "avg_ns": 55098963437, - "stddev_ns": 101718167, - "avg_ts": 9.292392, - "stddev_ts": 0.017137, - "samples_ns": [ - 55216104079, - 55047814910, - 55032971323 - ], - "samples_ts": [ - 9.27266, - 9.301, - 9.30351 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 387 - }, - { - "timestamp_utc": "2025-12-09T05:33:48.614245+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:32:44Z\",\n \"avg_ns\": 5623990396,\n \"stddev_ns\": 5720103,\n \"avg_ts\": 22.759656,\n \"stddev_ts\": 0.023135,\n \"samples_ns\": [ 5619257377, 5622368520, 5630345293 ],\n \"samples_ts\": [ 22.7788, 22.7662, 22.734 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:33:06Z\",\n \"avg_ns\": 13869208494,\n \"stddev_ns\": 32168652,\n \"avg_ts\": 9.229111,\n \"stddev_ts\": 0.021399,\n \"samples_ns\": [ 13838717371, 13866082530, 13902825583 ],\n \"samples_ts\": [ 9.24941, 9.23116, 9.20676 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T05:32:44Z", - "avg_ns": 5623990396, - "stddev_ns": 5720103, - "avg_ts": 22.759656, - "stddev_ts": 0.023135, - "samples_ns": [ - 5619257377, - 5622368520, - 5630345293 - ], - "samples_ts": [ - 22.7788, - 22.7662, - 22.734 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T05:33:06Z", - "avg_ns": 13869208494, - "stddev_ns": 32168652, - "avg_ts": 9.229111, - "stddev_ts": 0.021399, - "samples_ns": [ - 13838717371, - 13866082530, - 13902825583 - ], - "samples_ts": [ - 9.24941, - 9.23116, - 9.20676 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 388 - }, - { - "timestamp_utc": "2025-12-09T05:36:54.946700+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:33:49Z\",\n \"avg_ns\": 5623098726,\n \"stddev_ns\": 5568078,\n \"avg_ts\": 22.763264,\n \"stddev_ts\": 0.022527,\n \"samples_ns\": [ 5621371381, 5629324718, 5618600081 ],\n \"samples_ts\": [ 22.7702, 22.7381, 22.7815 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:34:11Z\",\n \"avg_ns\": 54255020272,\n \"stddev_ns\": 155444949,\n \"avg_ts\": 9.436966,\n \"stddev_ts\": 0.027049,\n \"samples_ns\": [ 54402146147, 54092414868, 54270499802 ],\n \"samples_ts\": [ 9.41139, 9.46528, 9.43422 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T05:33:49Z", - "avg_ns": 5623098726, - "stddev_ns": 5568078, - "avg_ts": 22.763264, - "stddev_ts": 0.022527, - "samples_ns": [ - 5621371381, - 5629324718, - 5618600081 - ], - "samples_ts": [ - 22.7702, - 22.7381, - 22.7815 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T05:34:11Z", - "avg_ns": 54255020272, - "stddev_ns": 155444949, - "avg_ts": 9.436966, - "stddev_ts": 0.027049, - "samples_ns": [ - 54402146147, - 54092414868, - 54270499802 - ], - "samples_ts": [ - 9.41139, - 9.46528, - 9.43422 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 389 - }, - { - "timestamp_utc": "2025-12-09T05:39:07.433107+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:36:55Z\",\n \"avg_ns\": 22587244962,\n \"stddev_ns\": 7255240,\n \"avg_ts\": 22.667662,\n \"stddev_ts\": 0.007280,\n \"samples_ns\": [ 22582878829, 22595620054, 22583236003 ],\n \"samples_ts\": [ 22.672, 22.6593, 22.6717 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:38:26Z\",\n \"avg_ns\": 13670229513,\n \"stddev_ns\": 16998156,\n \"avg_ts\": 9.363422,\n \"stddev_ts\": 0.011651,\n \"samples_ns\": [ 13679632581, 13650607846, 13680448113 ],\n \"samples_ts\": [ 9.35698, 9.37687, 9.35642 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T05:36:55Z", - "avg_ns": 22587244962, - "stddev_ns": 7255240, - "avg_ts": 22.667662, - "stddev_ts": 0.00728, - "samples_ns": [ - 22582878829, - 22595620054, - 22583236003 - ], - "samples_ts": [ - 22.672, - 22.6593, - 22.6717 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T05:38:26Z", - "avg_ns": 13670229513, - "stddev_ns": 16998156, - "avg_ts": 9.363422, - "stddev_ts": 0.011651, - "samples_ns": [ - 13679632581, - 13650607846, - 13680448113 - ], - "samples_ts": [ - 9.35698, - 9.37687, - 9.35642 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 390 - }, - { - "timestamp_utc": "2025-12-09T05:43:26.026610+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:39:08Z\",\n \"avg_ns\": 22555901178,\n \"stddev_ns\": 10533048,\n \"avg_ts\": 22.699163,\n \"stddev_ts\": 0.010600,\n \"samples_ns\": [ 22564262778, 22559366472, 22544074286 ],\n \"samples_ts\": [ 22.6907, 22.6957, 22.7111 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:40:38Z\",\n \"avg_ns\": 55743669100,\n \"stddev_ns\": 53903168,\n \"avg_ts\": 9.184905,\n \"stddev_ts\": 0.008885,\n \"samples_ns\": [ 55787580658, 55759915191, 55683511451 ],\n \"samples_ts\": [ 9.17767, 9.18222, 9.19482 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T05:39:08Z", - "avg_ns": 22555901178, - "stddev_ns": 10533048, - "avg_ts": 22.699163, - "stddev_ts": 0.0106, - "samples_ns": [ - 22564262778, - 22559366472, - 22544074286 - ], - "samples_ts": [ - 22.6907, - 22.6957, - 22.7111 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T05:40:38Z", - "avg_ns": 55743669100, - "stddev_ns": 53903168, - "avg_ts": 9.184905, - "stddev_ts": 0.008885, - "samples_ns": [ - 55787580658, - 55759915191, - 55683511451 - ], - "samples_ts": [ - 9.17767, - 9.18222, - 9.19482 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 391 - }, - { - "timestamp_utc": "2025-12-09T05:44:31.678442+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:43:26Z\",\n \"avg_ns\": 5676880801,\n \"stddev_ns\": 2870510,\n \"avg_ts\": 22.547597,\n \"stddev_ts\": 0.011404,\n \"samples_ns\": [ 5678933399, 5678108379, 5673600625 ],\n \"samples_ts\": [ 22.5394, 22.5427, 22.5606 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:43:49Z\",\n \"avg_ns\": 13949854511,\n \"stddev_ns\": 30816376,\n \"avg_ts\": 9.175753,\n \"stddev_ts\": 0.020262,\n \"samples_ns\": [ 13921089747, 13946096432, 13982377355 ],\n \"samples_ts\": [ 9.19468, 9.1782, 9.15438 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T05:43:26Z", - "avg_ns": 5676880801, - "stddev_ns": 2870510, - "avg_ts": 22.547597, - "stddev_ts": 0.011404, - "samples_ns": [ - 5678933399, - 5678108379, - 5673600625 - ], - "samples_ts": [ - 22.5394, - 22.5427, - 22.5606 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T05:43:49Z", - "avg_ns": 13949854511, - "stddev_ns": 30816376, - "avg_ts": 9.175753, - "stddev_ts": 0.020262, - "samples_ns": [ - 13921089747, - 13946096432, - 13982377355 - ], - "samples_ts": [ - 9.19468, - 9.1782, - 9.15438 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 392 - }, - { - "timestamp_utc": "2025-12-09T05:47:43.893065+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:44:32Z\",\n \"avg_ns\": 5638812923,\n \"stddev_ns\": 3133664,\n \"avg_ts\": 22.699818,\n \"stddev_ts\": 0.012615,\n \"samples_ns\": [ 5640179460, 5641030284, 5635229026 ],\n \"samples_ts\": [ 22.6943, 22.6909, 22.7142 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:44:55Z\",\n \"avg_ns\": 56184411836,\n \"stddev_ns\": 120353193,\n \"avg_ts\": 9.112876,\n \"stddev_ts\": 0.019497,\n \"samples_ns\": [ 56322865550, 56125565823, 56104804136 ],\n \"samples_ts\": [ 9.09045, 9.1224, 9.12578 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T05:44:32Z", - "avg_ns": 5638812923, - "stddev_ns": 3133664, - "avg_ts": 22.699818, - "stddev_ts": 0.012615, - "samples_ns": [ - 5640179460, - 5641030284, - 5635229026 - ], - "samples_ts": [ - 22.6943, - 22.6909, - 22.7142 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T05:44:55Z", - "avg_ns": 56184411836, - "stddev_ns": 120353193, - "avg_ts": 9.112876, - "stddev_ts": 0.019497, - "samples_ns": [ - 56322865550, - 56125565823, - 56104804136 - ], - "samples_ts": [ - 9.09045, - 9.1224, - 9.12578 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 393 - }, - { - "timestamp_utc": "2025-12-09T05:49:57.825604+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:47:44Z\",\n \"avg_ns\": 22835260340,\n \"stddev_ns\": 10417902,\n \"avg_ts\": 22.421469,\n \"stddev_ts\": 0.010225,\n \"samples_ns\": [ 22831591690, 22827175353, 22847013979 ],\n \"samples_ts\": [ 22.4251, 22.4294, 22.4099 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:49:16Z\",\n \"avg_ns\": 13816441810,\n \"stddev_ns\": 22694535,\n \"avg_ts\": 9.264341,\n \"stddev_ts\": 0.015230,\n \"samples_ns\": [ 13832570272, 13790491304, 13826263856 ],\n \"samples_ts\": [ 9.25352, 9.28176, 9.25774 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T05:47:44Z", - "avg_ns": 22835260340, - "stddev_ns": 10417902, - "avg_ts": 22.421469, - "stddev_ts": 0.010225, - "samples_ns": [ - 22831591690, - 22827175353, - 22847013979 - ], - "samples_ts": [ - 22.4251, - 22.4294, - 22.4099 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T05:49:16Z", - "avg_ns": 13816441810, - "stddev_ns": 22694535, - "avg_ts": 9.264341, - "stddev_ts": 0.01523, - "samples_ns": [ - 13832570272, - 13790491304, - 13826263856 - ], - "samples_ts": [ - 9.25352, - 9.28176, - 9.25774 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 394 - }, - { - "timestamp_utc": "2025-12-09T05:54:13.755542+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:49:58Z\",\n \"avg_ns\": 23074927326,\n \"stddev_ns\": 14434082,\n \"avg_ts\": 22.188591,\n \"stddev_ts\": 0.013881,\n \"samples_ns\": [ 23088140051, 23077117985, 23059523943 ],\n \"samples_ts\": [ 22.1759, 22.1865, 22.2034 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:51:31Z\",\n \"avg_ns\": 54170707927,\n \"stddev_ns\": 16471228,\n \"avg_ts\": 9.451603,\n \"stddev_ts\": 0.002874,\n \"samples_ns\": [ 54153184837, 54185868940, 54173070005 ],\n \"samples_ts\": [ 9.45466, 9.44896, 9.45119 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T05:49:58Z", - "avg_ns": 23074927326, - "stddev_ns": 14434082, - "avg_ts": 22.188591, - "stddev_ts": 0.013881, - "samples_ns": [ - 23088140051, - 23077117985, - 23059523943 - ], - "samples_ts": [ - 22.1759, - 22.1865, - 22.2034 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T05:51:31Z", - "avg_ns": 54170707927, - "stddev_ns": 16471228, - "avg_ts": 9.451603, - "stddev_ts": 0.002874, - "samples_ns": [ - 54153184837, - 54185868940, - 54173070005 - ], - "samples_ts": [ - 9.45466, - 9.44896, - 9.45119 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 395 - }, - { - "timestamp_utc": "2025-12-09T05:54:48.376251+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:54:14Z\",\n \"avg_ns\": 2883255397,\n \"stddev_ns\": 5702666,\n \"avg_ts\": 44.394379,\n \"stddev_ts\": 0.087838,\n \"samples_ns\": [ 2888568957, 2883966845, 2877230389 ],\n \"samples_ts\": [ 44.3126, 44.3833, 44.4872 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:54:26Z\",\n \"avg_ns\": 7352639988,\n \"stddev_ns\": 19422153,\n \"avg_ts\": 17.408794,\n \"stddev_ts\": 0.045989,\n \"samples_ns\": [ 7352945554, 7333066854, 7371907556 ],\n \"samples_ts\": [ 17.408, 17.4552, 17.3632 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T05:54:14Z", - "avg_ns": 2883255397, - "stddev_ns": 5702666, - "avg_ts": 44.394379, - "stddev_ts": 0.087838, - "samples_ns": [ - 2888568957, - 2883966845, - 2877230389 - ], - "samples_ts": [ - 44.3126, - 44.3833, - 44.4872 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T05:54:26Z", - "avg_ns": 7352639988, - "stddev_ns": 19422153, - "avg_ts": 17.408794, - "stddev_ts": 0.045989, - "samples_ns": [ - 7352945554, - 7333066854, - 7371907556 - ], - "samples_ts": [ - 17.408, - 17.4552, - 17.3632 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 396 - }, - { - "timestamp_utc": "2025-12-09T05:56:34.318488+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:54:49Z\",\n \"avg_ns\": 3259899640,\n \"stddev_ns\": 444547032,\n \"avg_ts\": 39.736575,\n \"stddev_ts\": 5.197966,\n \"samples_ns\": [ 2882896965, 3146699467, 3750102488 ],\n \"samples_ts\": [ 44.3998, 40.6775, 34.1324 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:55:01Z\",\n \"avg_ns\": 30734464495,\n \"stddev_ns\": 1227690565,\n \"avg_ts\": 16.676279,\n \"stddev_ts\": 0.655606,\n \"samples_ns\": [ 32104939810, 29735264765, 30363188910 ],\n \"samples_ts\": [ 15.9477, 17.2186, 16.8625 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T05:54:49Z", - "avg_ns": 3259899640, - "stddev_ns": 444547032, - "avg_ts": 39.736575, - "stddev_ts": 5.197966, - "samples_ns": [ - 2882896965, - 3146699467, - 3750102488 - ], - "samples_ts": [ - 44.3998, - 40.6775, - 34.1324 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T05:55:01Z", - "avg_ns": 30734464495, - "stddev_ns": 1227690565, - "avg_ts": 16.676279, - "stddev_ts": 0.655606, - "samples_ns": [ - 32104939810, - 29735264765, - 30363188910 - ], - "samples_ts": [ - 15.9477, - 17.2186, - 16.8625 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 397 - }, - { - "timestamp_utc": "2025-12-09T06:00:15.109138+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:56:35Z\",\n \"avg_ns\": 40940834623,\n \"stddev_ns\": 141230461,\n \"avg_ts\": 12.505951,\n \"stddev_ts\": 0.043058,\n \"samples_ns\": [ 40872278344, 41103257329, 40846968197 ],\n \"samples_ts\": [ 12.5268, 12.4564, 12.5346 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:59:11Z\",\n \"avg_ns\": 21052041564,\n \"stddev_ns\": 2734293586,\n \"avg_ts\": 8.701940,\n \"stddev_ts\": 7.089982,\n \"samples_ns\": [ 28474794652, 27101844373, 7579485669 ],\n \"samples_ts\": [ 4.4952, 4.72293, 16.8877 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T05:56:35Z", - "avg_ns": 40940834623, - "stddev_ns": 141230461, - "avg_ts": 12.505951, - "stddev_ts": 0.043058, - "samples_ns": [ - 40872278344, - 41103257329, - 40846968197 - ], - "samples_ts": [ - 12.5268, - 12.4564, - 12.5346 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T05:59:11Z", - "avg_ns": 21052041564, - "stddev_ns": 2734293586, - "avg_ts": 8.70194, - "stddev_ts": 7.089982, - "samples_ns": [ - 28474794652, - 27101844373, - 7579485669 - ], - "samples_ts": [ - 4.4952, - 4.72293, - 16.8877 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 398 - }, - { - "timestamp_utc": "2025-12-09T06:05:26.731507+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:00:16Z\",\n \"avg_ns\": 11602433222,\n \"stddev_ns\": 6155895,\n \"avg_ts\": 44.128683,\n \"stddev_ts\": 0.023407,\n \"samples_ns\": [ 11596176257, 11608478832, 11602644579 ],\n \"samples_ts\": [ 44.1525, 44.1057, 44.1279 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:01:02Z\",\n \"avg_ns\": 88055571909,\n \"stddev_ns\": 1296867326,\n \"avg_ts\": 6.215235,\n \"stddev_ts\": 2.000845,\n \"samples_ns\": [ 88506057247, 114680891262, 60979767219 ],\n \"samples_ts\": [ 5.78491, 4.46456, 8.39623 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T06:00:16Z", - "avg_ns": 11602433222, - "stddev_ns": 6155895, - "avg_ts": 44.128683, - "stddev_ts": 0.023407, - "samples_ns": [ - 11596176257, - 11608478832, - 11602644579 - ], - "samples_ts": [ - 44.1525, - 44.1057, - 44.1279 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T06:01:02Z", - "avg_ns": 88055571909, - "stddev_ns": 1296867326, - "avg_ts": 6.215235, - "stddev_ts": 2.000845, - "samples_ns": [ - 88506057247, - 114680891262, - 60979767219 - ], - "samples_ts": [ - 5.78491, - 4.46456, - 8.39623 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 399 - }, - { - "timestamp_utc": "2025-12-09T06:06:01.215623+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:05:27Z\",\n \"avg_ns\": 2871898505,\n \"stddev_ns\": 4144878,\n \"avg_ts\": 44.569882,\n \"stddev_ts\": 0.064316,\n \"samples_ns\": [ 2871620151, 2876175545, 2867899819 ],\n \"samples_ts\": [ 44.5741, 44.5035, 44.632 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:05:39Z\",\n \"avg_ns\": 7316979933,\n \"stddev_ns\": 23797464,\n \"avg_ts\": 17.493680,\n \"stddev_ts\": 0.056934,\n \"samples_ns\": [ 7291670446, 7320367689, 7338901666 ],\n \"samples_ts\": [ 17.5543, 17.4855, 17.4413 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T06:05:27Z", - "avg_ns": 2871898505, - "stddev_ns": 4144878, - "avg_ts": 44.569882, - "stddev_ts": 0.064316, - "samples_ns": [ - 2871620151, - 2876175545, - 2867899819 - ], - "samples_ts": [ - 44.5741, - 44.5035, - 44.632 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T06:05:39Z", - "avg_ns": 7316979933, - "stddev_ns": 23797464, - "avg_ts": 17.49368, - "stddev_ts": 0.056934, - "samples_ns": [ - 7291670446, - 7320367689, - 7338901666 - ], - "samples_ts": [ - 17.5543, - 17.4855, - 17.4413 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 400 - }, - { - "timestamp_utc": "2025-12-09T06:10:35.991558+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:06:02Z\",\n \"avg_ns\": 4122652119,\n \"stddev_ns\": 1211337054,\n \"avg_ts\": 32.751606,\n \"stddev_ts\": 8.774919,\n \"samples_ns\": [ 3139963666, 3751975463, 5476017230 ],\n \"samples_ts\": [ 40.7648, 34.1154, 23.3747 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:06:17Z\",\n \"avg_ns\": 86082915272,\n \"stddev_ns\": 2470454459,\n \"avg_ts\": 8.277975,\n \"stddev_ts\": 6.481674,\n \"samples_ns\": [ 113515071663, 112251125043, 32482549111 ],\n \"samples_ts\": [ 4.51041, 4.5612, 15.7623 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T06:06:02Z", - "avg_ns": 4122652119, - "stddev_ns": 1211337054, - "avg_ts": 32.751606, - "stddev_ts": 8.774919, - "samples_ns": [ - 3139963666, - 3751975463, - 5476017230 - ], - "samples_ts": [ - 40.7648, - 34.1154, - 23.3747 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T06:06:17Z", - "avg_ns": 86082915272, - "stddev_ns": 2470454459, - "avg_ts": 8.277975, - "stddev_ts": 6.481674, - "samples_ns": [ - 113515071663, - 112251125043, - 32482549111 - ], - "samples_ts": [ - 4.51041, - 4.5612, - 15.7623 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 401 - }, - { - "timestamp_utc": "2025-12-09T06:13:15.069563+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:10:36Z\",\n \"avg_ns\": 20610458940,\n \"stddev_ns\": 1816513687,\n \"avg_ts\": 32.392718,\n \"stddev_ts\": 16.362703,\n \"samples_ns\": [ 11506491851, 13160265841, 37164619129 ],\n \"samples_ts\": [ 44.4966, 38.905, 13.7765 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:11:50Z\",\n \"avg_ns\": 28126029231,\n \"stddev_ns\": 91209794,\n \"avg_ts\": 4.550976,\n \"stddev_ts\": 0.014764,\n \"samples_ns\": [ 28031619962, 28213660809, 28132806924 ],\n \"samples_ts\": [ 4.56627, 4.53681, 4.54985 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T06:10:36Z", - "avg_ns": 20610458940, - "stddev_ns": 1816513687, - "avg_ts": 32.392718, - "stddev_ts": 16.362703, - "samples_ns": [ - 11506491851, - 13160265841, - 37164619129 - ], - "samples_ts": [ - 44.4966, - 38.905, - 13.7765 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T06:11:50Z", - "avg_ns": 28126029231, - "stddev_ns": 91209794, - "avg_ts": 4.550976, - "stddev_ts": 0.014764, - "samples_ns": [ - 28031619962, - 28213660809, - 28132806924 - ], - "samples_ts": [ - 4.56627, - 4.53681, - 4.54985 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 402 - }, - { - "timestamp_utc": "2025-12-09T06:18:37.944943+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:13:16Z\",\n \"avg_ns\": 28189729075,\n \"stddev_ns\": 1928625124,\n \"avg_ts\": 24.301697,\n \"stddev_ts\": 17.593082,\n \"samples_ns\": [ 40545644820, 32527427783, 11496114623 ],\n \"samples_ts\": [ 12.6277, 15.7406, 44.5368 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:15:21Z\",\n \"avg_ns\": 65352010894,\n \"stddev_ns\": 3778172106,\n \"avg_ts\": 10.429055,\n \"stddev_ts\": 6.404579,\n \"samples_ns\": [ 29665361267, 54067066993, 112323604423 ],\n \"samples_ts\": [ 17.2592, 9.46972, 4.55826 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T06:13:16Z", - "avg_ns": 28189729075, - "stddev_ns": 1928625124, - "avg_ts": 24.301697, - "stddev_ts": 17.593082, - "samples_ns": [ - 40545644820, - 32527427783, - 11496114623 - ], - "samples_ts": [ - 12.6277, - 15.7406, - 44.5368 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T06:15:21Z", - "avg_ns": 65352010894, - "stddev_ns": 3778172106, - "avg_ts": 10.429055, - "stddev_ts": 6.404579, - "samples_ns": [ - 29665361267, - 54067066993, - 112323604423 - ], - "samples_ts": [ - 17.2592, - 9.46972, - 4.55826 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 403 - }, - { - "timestamp_utc": "2025-12-09T06:20:16.866030+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:18:39Z\",\n \"avg_ns\": 10205395574,\n \"stddev_ns\": 62110548,\n \"avg_ts\": 12.542695,\n \"stddev_ts\": 0.076380,\n \"samples_ns\": [ 10209236922, 10265496294, 10141453506 ],\n \"samples_ts\": [ 12.5377, 12.469, 12.6215 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:19:20Z\",\n \"avg_ns\": 18702787720,\n \"stddev_ns\": 4066184215,\n \"avg_ts\": 9.422791,\n \"stddev_ts\": 7.108428,\n \"samples_ns\": [ 27711485199, 21119428920, 7277449041 ],\n \"samples_ts\": [ 4.61902, 6.06077, 17.5886 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T06:18:39Z", - "avg_ns": 10205395574, - "stddev_ns": 62110548, - "avg_ts": 12.542695, - "stddev_ts": 0.07638, - "samples_ns": [ - 10209236922, - 10265496294, - 10141453506 - ], - "samples_ts": [ - 12.5377, - 12.469, - 12.6215 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T06:19:20Z", - "avg_ns": 18702787720, - "stddev_ns": 4066184215, - "avg_ts": 9.422791, - "stddev_ts": 7.108428, - "samples_ns": [ - 27711485199, - 21119428920, - 7277449041 - ], - "samples_ts": [ - 4.61902, - 6.06077, - 17.5886 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 404 - }, - { - "timestamp_utc": "2025-12-09T06:24:06.865596+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:20:17Z\",\n \"avg_ns\": 2920993486,\n \"stddev_ns\": 3545928,\n \"avg_ts\": 43.820750,\n \"stddev_ts\": 0.053159,\n \"samples_ns\": [ 2925085433, 2918822655, 2919072370 ],\n \"samples_ts\": [ 43.7594, 43.8533, 43.8495 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:20:29Z\",\n \"avg_ns\": 72395097171,\n \"stddev_ns\": 1867162442,\n \"avg_ts\": 9.514526,\n \"stddev_ts\": 6.758249,\n \"samples_ns\": [ 29729751574, 76187306810, 111268233131 ],\n \"samples_ts\": [ 17.2218, 6.72028, 4.60149 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T06:20:17Z", - "avg_ns": 2920993486, - "stddev_ns": 3545928, - "avg_ts": 43.82075, - "stddev_ts": 0.053159, - "samples_ns": [ - 2925085433, - 2918822655, - 2919072370 - ], - "samples_ts": [ - 43.7594, - 43.8533, - 43.8495 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T06:20:29Z", - "avg_ns": 72395097171, - "stddev_ns": 1867162442, - "avg_ts": 9.514526, - "stddev_ts": 6.758249, - "samples_ns": [ - 29729751574, - 76187306810, - 111268233131 - ], - "samples_ts": [ - 17.2218, - 6.72028, - 4.60149 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 405 - }, - { - "timestamp_utc": "2025-12-09T06:25:59.108130+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:24:08Z\",\n \"avg_ns\": 15904898938,\n \"stddev_ns\": 451823041,\n \"avg_ts\": 36.383130,\n \"stddev_ts\": 13.418433,\n \"samples_ns\": [ 24510661216, 11606256891, 11597778709 ],\n \"samples_ts\": [ 20.8889, 44.1141, 44.1464 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:25:36Z\",\n \"avg_ns\": 7394265286,\n \"stddev_ns\": 15662411,\n \"avg_ts\": 17.310764,\n \"stddev_ts\": 0.036711,\n \"samples_ns\": [ 7404424581, 7402143081, 7376228197 ],\n \"samples_ts\": [ 17.287, 17.2923, 17.353 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T06:24:08Z", - "avg_ns": 15904898938, - "stddev_ns": 451823041, - "avg_ts": 36.38313, - "stddev_ts": 13.418433, - "samples_ns": [ - 24510661216, - 11606256891, - 11597778709 - ], - "samples_ts": [ - 20.8889, - 44.1141, - 44.1464 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T06:25:36Z", - "avg_ns": 7394265286, - "stddev_ns": 15662411, - "avg_ts": 17.310764, - "stddev_ts": 0.036711, - "samples_ns": [ - 7404424581, - 7402143081, - 7376228197 - ], - "samples_ts": [ - 17.287, - 17.2923, - 17.353 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 406 - }, - { - "timestamp_utc": "2025-12-09T06:31:06.662052+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:25:59Z\",\n \"avg_ns\": 39478800835,\n \"stddev_ns\": 1844651962,\n \"avg_ts\": 12.988414,\n \"stddev_ts\": 0.623695,\n \"samples_ns\": [ 37349016953, 40571192687, 40516192867 ],\n \"samples_ts\": [ 13.7085, 12.6198, 12.6369 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:28:11Z\",\n \"avg_ns\": 58148490410,\n \"stddev_ns\": 4018641954,\n \"avg_ts\": 12.608545,\n \"stddev_ts\": 7.002434,\n \"samples_ns\": [ 112892617453, 31494710816, 30058142961 ],\n \"samples_ts\": [ 4.53528, 16.2567, 17.0337 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T06:25:59Z", - "avg_ns": 39478800835, - "stddev_ns": 1844651962, - "avg_ts": 12.988414, - "stddev_ts": 0.623695, - "samples_ns": [ - 37349016953, - 40571192687, - 40516192867 - ], - "samples_ts": [ - 13.7085, - 12.6198, - 12.6369 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T06:28:11Z", - "avg_ns": 58148490410, - "stddev_ns": 4018641954, - "avg_ts": 12.608545, - "stddev_ts": 7.002434, - "samples_ns": [ - 112892617453, - 31494710816, - 30058142961 - ], - "samples_ts": [ - 4.53528, - 16.2567, - 17.0337 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 407 - }, - { - "timestamp_utc": "2025-12-09T06:32:59.804880+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:31:07Z\",\n \"avg_ns\": 8213943819,\n \"stddev_ns\": 4129847776,\n \"avg_ts\": 17.228464,\n \"stddev_ts\": 7.253413,\n \"samples_ns\": [ 5001800943, 9513626282, 10126404234 ],\n \"samples_ts\": [ 25.5908, 13.4544, 12.6402 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:31:35Z\",\n \"avg_ns\": 27812330358,\n \"stddev_ns\": 311125352,\n \"avg_ts\": 4.602661,\n \"stddev_ts\": 0.051658,\n \"samples_ns\": [ 28087383921, 27474658284, 27874948869 ],\n \"samples_ts\": [ 4.55721, 4.65884, 4.59194 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T06:31:07Z", - "avg_ns": 8213943819, - "stddev_ns": 4129847776, - "avg_ts": 17.228464, - "stddev_ts": 7.253413, - "samples_ns": [ - 5001800943, - 9513626282, - 10126404234 - ], - "samples_ts": [ - 25.5908, - 13.4544, - 12.6402 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T06:31:35Z", - "avg_ns": 27812330358, - "stddev_ns": 311125352, - "avg_ts": 4.602661, - "stddev_ts": 0.051658, - "samples_ns": [ - 28087383921, - 27474658284, - 27874948869 - ], - "samples_ts": [ - 4.55721, - 4.65884, - 4.59194 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 408 - }, - { - "timestamp_utc": "2025-12-09T06:36:28.679154+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:33:01Z\",\n \"avg_ns\": 10216413873,\n \"stddev_ns\": 30231237,\n \"avg_ts\": 12.528931,\n \"stddev_ts\": 0.037133,\n \"samples_ns\": [ 10181776732, 10237491312, 10229973575 ],\n \"samples_ts\": [ 12.5715, 12.5031, 12.5123 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:33:42Z\",\n \"avg_ns\": 55236326888,\n \"stddev_ns\": 772752475,\n \"avg_ts\": 11.644460,\n \"stddev_ts\": 5.915739,\n \"samples_ns\": [ 94325905110, 29759217583, 41623857972 ],\n \"samples_ts\": [ 5.42799, 17.2048, 12.3006 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T06:33:01Z", - "avg_ns": 10216413873, - "stddev_ns": 30231237, - "avg_ts": 12.528931, - "stddev_ts": 0.037133, - "samples_ns": [ - 10181776732, - 10237491312, - 10229973575 - ], - "samples_ts": [ - 12.5715, - 12.5031, - 12.5123 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T06:33:42Z", - "avg_ns": 55236326888, - "stddev_ns": 772752475, - "avg_ts": 11.64446, - "stddev_ts": 5.915739, - "samples_ns": [ - 94325905110, - 29759217583, - 41623857972 - ], - "samples_ts": [ - 5.42799, - 17.2048, - 12.3006 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 409 - }, - { - "timestamp_utc": "2025-12-09T06:40:15.628259+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:36:30Z\",\n \"avg_ns\": 41207787974,\n \"stddev_ns\": 89841550,\n \"avg_ts\": 12.424875,\n \"stddev_ts\": 0.027120,\n \"samples_ns\": [ 41105017458, 41246918846, 41271427618 ],\n \"samples_ts\": [ 12.4559, 12.413, 12.4057 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:39:15Z\",\n \"avg_ns\": 20014672879,\n \"stddev_ns\": 1464175909,\n \"avg_ts\": 9.047843,\n \"stddev_ts\": 7.237191,\n \"samples_ns\": [ 27713485190, 24974004676, 7356528772 ],\n \"samples_ts\": [ 4.61869, 5.12533, 17.3995 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T06:36:30Z", - "avg_ns": 41207787974, - "stddev_ns": 89841550, - "avg_ts": 12.424875, - "stddev_ts": 0.02712, - "samples_ns": [ - 41105017458, - 41246918846, - 41271427618 - ], - "samples_ts": [ - 12.4559, - 12.413, - 12.4057 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T06:39:15Z", - "avg_ns": 20014672879, - "stddev_ns": 1464175909, - "avg_ts": 9.047843, - "stddev_ts": 7.237191, - "samples_ns": [ - 27713485190, - 24974004676, - 7356528772 - ], - "samples_ts": [ - 4.61869, - 5.12533, - 17.3995 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 410 - }, - { - "timestamp_utc": "2025-12-09T06:45:24.908041+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:40:16Z\",\n \"avg_ns\": 11610334195,\n \"stddev_ns\": 7602695,\n \"avg_ts\": 44.098657,\n \"stddev_ts\": 0.028876,\n \"samples_ns\": [ 11618097695, 11602903216, 11610001674 ],\n \"samples_ts\": [ 44.0692, 44.1269, 44.0999 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:41:02Z\",\n \"avg_ns\": 87268438617,\n \"stddev_ns\": 3841957043,\n \"avg_ts\": 6.198187,\n \"stddev_ts\": 1.786462,\n \"samples_ns\": [ 86233812869, 112367321952, 63204181032 ],\n \"samples_ts\": [ 5.93735, 4.55648, 8.10073 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T06:40:16Z", - "avg_ns": 11610334195, - "stddev_ns": 7602695, - "avg_ts": 44.098657, - "stddev_ts": 0.028876, - "samples_ns": [ - 11618097695, - 11602903216, - 11610001674 - ], - "samples_ts": [ - 44.0692, - 44.1269, - 44.0999 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T06:41:02Z", - "avg_ns": 87268438617, - "stddev_ns": 3841957043, - "avg_ts": 6.198187, - "stddev_ts": 1.786462, - "samples_ns": [ - 86233812869, - 112367321952, - 63204181032 - ], - "samples_ts": [ - 5.93735, - 4.55648, - 8.10073 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 411 - }, - { - "timestamp_utc": "2025-12-09T06:45:59.461615+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:45:25Z\",\n \"avg_ns\": 2873007048,\n \"stddev_ns\": 1304166,\n \"avg_ts\": 44.552629,\n \"stddev_ts\": 0.020204,\n \"samples_ns\": [ 2872685488, 2871894869, 2874440788 ],\n \"samples_ts\": [ 44.5576, 44.5699, 44.5304 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:45:37Z\",\n \"avg_ns\": 7342721631,\n \"stddev_ns\": 30741612,\n \"avg_ts\": 17.432432,\n \"stddev_ts\": 0.073045,\n \"samples_ns\": [ 7346861933, 7371183030, 7310119932 ],\n \"samples_ts\": [ 17.4224, 17.3649, 17.51 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T06:45:25Z", - "avg_ns": 2873007048, - "stddev_ns": 1304166, - "avg_ts": 44.552629, - "stddev_ts": 0.020204, - "samples_ns": [ - 2872685488, - 2871894869, - 2874440788 - ], - "samples_ts": [ - 44.5576, - 44.5699, - 44.5304 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T06:45:37Z", - "avg_ns": 7342721631, - "stddev_ns": 30741612, - "avg_ts": 17.432432, - "stddev_ts": 0.073045, - "samples_ns": [ - 7346861933, - 7371183030, - 7310119932 - ], - "samples_ts": [ - 17.4224, - 17.3649, - 17.51 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 412 - }, - { - "timestamp_utc": "2025-12-09T06:50:33.723279+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:46:00Z\",\n \"avg_ns\": 3515636988,\n \"stddev_ns\": 686716071,\n \"avg_ts\": 37.339961,\n \"stddev_ts\": 7.195355,\n \"samples_ns\": [ 2870109709, 3439590074, 4237211182 ],\n \"samples_ts\": [ 44.5976, 37.2137, 30.2085 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:46:13Z\",\n \"avg_ns\": 86581877261,\n \"stddev_ns\": 2156113600,\n \"avg_ts\": 7.509895,\n \"stddev_ts\": 4.986786,\n \"samples_ns\": [ 109165838819, 111989936087, 38589856877 ],\n \"samples_ts\": [ 4.69011, 4.57184, 13.2677 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T06:46:00Z", - "avg_ns": 3515636988, - "stddev_ns": 686716071, - "avg_ts": 37.339961, - "stddev_ts": 7.195355, - "samples_ns": [ - 2870109709, - 3439590074, - 4237211182 - ], - "samples_ts": [ - 44.5976, - 37.2137, - 30.2085 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T06:46:13Z", - "avg_ns": 86581877261, - "stddev_ns": 2156113600, - "avg_ts": 7.509895, - "stddev_ts": 4.986786, - "samples_ns": [ - 109165838819, - 111989936087, - 38589856877 - ], - "samples_ts": [ - 4.69011, - 4.57184, - 13.2677 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 413 - }, - { - "timestamp_utc": "2025-12-09T06:53:06.541736+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:50:34Z\",\n \"avg_ns\": 18578381672,\n \"stddev_ns\": 4150273218,\n \"avg_ts\": 34.213090,\n \"stddev_ts\": 15.849277,\n \"samples_ns\": [ 11533323837, 12105713027, 32096108153 ],\n \"samples_ts\": [ 44.3931, 42.2941, 15.9521 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:51:41Z\",\n \"avg_ns\": 28091945930,\n \"stddev_ns\": 320259659,\n \"avg_ts\": 4.556862,\n \"stddev_ts\": 0.052046,\n \"samples_ns\": [ 28393926192, 27756098583, 28125813015 ],\n \"samples_ts\": [ 4.50801, 4.6116, 4.55098 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T06:50:34Z", - "avg_ns": 18578381672, - "stddev_ns": 4150273218, - "avg_ts": 34.21309, - "stddev_ts": 15.849277, - "samples_ns": [ - 11533323837, - 12105713027, - 32096108153 - ], - "samples_ts": [ - 44.3931, - 42.2941, - 15.9521 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T06:51:41Z", - "avg_ns": 28091945930, - "stddev_ns": 320259659, - "avg_ts": 4.556862, - "stddev_ts": 0.052046, - "samples_ns": [ - 28393926192, - 27756098583, - 28125813015 - ], - "samples_ts": [ - 4.50801, - 4.6116, - 4.55098 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 414 - }, - { - "timestamp_utc": "2025-12-09T06:58:29.843445+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:53:08Z\",\n \"avg_ns\": 30204718373,\n \"stddev_ns\": 2183368718,\n \"avg_ts\": 23.463347,\n \"stddev_ts\": 18.208521,\n \"samples_ns\": [ 40491956110, 38612891666, 11509307344 ],\n \"samples_ts\": [ 12.6445, 13.2598, 44.4857 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:55:19Z\",\n \"avg_ns\": 63404550032,\n \"stddev_ns\": 3955404398,\n \"avg_ts\": 10.790338,\n \"stddev_ts\": 6.319767,\n \"samples_ns\": [ 29767761519, 48270356801, 112175531777 ],\n \"samples_ts\": [ 17.1998, 10.6069, 4.56428 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T06:53:08Z", - "avg_ns": 30204718373, - "stddev_ns": 2183368718, - "avg_ts": 23.463347, - "stddev_ts": 18.208521, - "samples_ns": [ - 40491956110, - 38612891666, - 11509307344 - ], - "samples_ts": [ - 12.6445, - 13.2598, - 44.4857 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T06:55:19Z", - "avg_ns": 63404550032, - "stddev_ns": 3955404398, - "avg_ts": 10.790338, - "stddev_ts": 6.319767, - "samples_ns": [ - 29767761519, - 48270356801, - 112175531777 - ], - "samples_ts": [ - 17.1998, - 10.6069, - 4.56428 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 415 - }, - { - "timestamp_utc": "2025-12-09T07:00:14.865705+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:58:31Z\",\n \"avg_ns\": 10193073651,\n \"stddev_ns\": 44933482,\n \"avg_ts\": 12.557709,\n \"stddev_ts\": 0.055249,\n \"samples_ns\": [ 10243570012, 10178149997, 10157500945 ],\n \"samples_ts\": [ 12.4956, 12.576, 12.6015 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:59:12Z\",\n \"avg_ns\": 20758438533,\n \"stddev_ns\": 2265934488,\n \"avg_ts\": 8.887257,\n \"stddev_ts\": 7.314085,\n \"samples_ns\": [ 27797295672, 27093071829, 7384948099 ],\n \"samples_ts\": [ 4.60476, 4.72446, 17.3326 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T06:58:31Z", - "avg_ns": 10193073651, - "stddev_ns": 44933482, - "avg_ts": 12.557709, - "stddev_ts": 0.055249, - "samples_ns": [ - 10243570012, - 10178149997, - 10157500945 - ], - "samples_ts": [ - 12.4956, - 12.576, - 12.6015 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T06:59:12Z", - "avg_ns": 20758438533, - "stddev_ns": 2265934488, - "avg_ts": 8.887257, - "stddev_ts": 7.314085, - "samples_ns": [ - 27797295672, - 27093071829, - 7384948099 - ], - "samples_ts": [ - 4.60476, - 4.72446, - 17.3326 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 416 - }, - { - "timestamp_utc": "2025-12-09T07:03:59.668246+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:00:15Z\",\n \"avg_ns\": 2884652269,\n \"stddev_ns\": 5727586,\n \"avg_ts\": 44.372883,\n \"stddev_ts\": 0.088204,\n \"samples_ns\": [ 2878044643, 2888200435, 2887711729 ],\n \"samples_ts\": [ 44.4746, 44.3183, 44.3258 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:00:27Z\",\n \"avg_ns\": 70721140855,\n \"stddev_ns\": 4210915805,\n \"avg_ts\": 9.685576,\n \"stddev_ts\": 6.640001,\n \"samples_ns\": [ 29792655753, 69989788932, 112380977880 ],\n \"samples_ts\": [ 17.1854, 7.31535, 4.55593 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T07:00:15Z", - "avg_ns": 2884652269, - "stddev_ns": 5727586, - "avg_ts": 44.372883, - "stddev_ts": 0.088204, - "samples_ns": [ - 2878044643, - 2888200435, - 2887711729 - ], - "samples_ts": [ - 44.4746, - 44.3183, - 44.3258 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T07:00:27Z", - "avg_ns": 70721140855, - "stddev_ns": 4210915805, - "avg_ts": 9.685576, - "stddev_ts": 6.640001, - "samples_ns": [ - 29792655753, - 69989788932, - 112380977880 - ], - "samples_ts": [ - 17.1854, - 7.31535, - 4.55593 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 417 - }, - { - "timestamp_utc": "2025-12-09T07:05:57.275575+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:04:01Z\",\n \"avg_ns\": 17628087376,\n \"stddev_ns\": 3939361678,\n \"avg_ts\": 35.099825,\n \"stddev_ts\": 15.424697,\n \"samples_ns\": [ 29614316109, 11629373805, 11640572216 ],\n \"samples_ts\": [ 17.2889, 44.0264, 43.9841 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:05:34Z\",\n \"avg_ns\": 7419055616,\n \"stddev_ns\": 18634463,\n \"avg_ts\": 17.252942,\n \"stddev_ts\": 0.043348,\n \"samples_ns\": [ 7436792227, 7420736619, 7399638004 ],\n \"samples_ts\": [ 17.2117, 17.249, 17.2981 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T07:04:01Z", - "avg_ns": 17628087376, - "stddev_ns": 3939361678, - "avg_ts": 35.099825, - "stddev_ts": 15.424697, - "samples_ns": [ - 29614316109, - 11629373805, - 11640572216 - ], - "samples_ts": [ - 17.2889, - 44.0264, - 43.9841 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T07:05:34Z", - "avg_ns": 7419055616, - "stddev_ns": 18634463, - "avg_ts": 17.252942, - "stddev_ts": 0.043348, - "samples_ns": [ - 7436792227, - 7420736619, - 7399638004 - ], - "samples_ts": [ - 17.2117, - 17.249, - 17.2981 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 418 - }, - { - "timestamp_utc": "2025-12-09T07:11:04.154707+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:05:58Z\",\n \"avg_ns\": 38088396475,\n \"stddev_ns\": 4019918701,\n \"avg_ts\": 13.549558,\n \"stddev_ts\": 1.522768,\n \"samples_ns\": [ 33447035511, 40464507904, 40353646012 ],\n \"samples_ts\": [ 15.3078, 12.6531, 12.6878 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:08:04Z\",\n \"avg_ns\": 59650427554,\n \"stddev_ns\": 958792515,\n \"avg_ts\": 11.873129,\n \"stddev_ts\": 6.553584,\n \"samples_ns\": [ 112213500317, 37008108076, 29729674270 ],\n \"samples_ts\": [ 4.56273, 13.8348, 17.2219 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T07:05:58Z", - "avg_ns": 38088396475, - "stddev_ns": 4019918701, - "avg_ts": 13.549558, - "stddev_ts": 1.522768, - "samples_ns": [ - 33447035511, - 40464507904, - 40353646012 - ], - "samples_ts": [ - 15.3078, - 12.6531, - 12.6878 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T07:08:04Z", - "avg_ns": 59650427554, - "stddev_ns": 958792515, - "avg_ts": 11.873129, - "stddev_ts": 6.553584, - "samples_ns": [ - 112213500317, - 37008108076, - 29729674270 - ], - "samples_ts": [ - 4.56273, - 13.8348, - 17.2219 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 419 - }, - { - "timestamp_utc": "2025-12-09T07:12:52.004956+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:11:05Z\",\n \"avg_ns\": 6446922013,\n \"stddev_ns\": 3429637782,\n \"avg_ts\": 23.591416,\n \"stddev_ts\": 10.868190,\n \"samples_ns\": [ 3750690866, 5283032706, 10307042467 ],\n \"samples_ts\": [ 34.127, 24.2285, 12.4187 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:11:27Z\",\n \"avg_ns\": 28018136236,\n \"stddev_ns\": 179749730,\n \"avg_ts\": 4.568595,\n \"stddev_ts\": 0.029264,\n \"samples_ns\": [ 27855622639, 27987581106, 28211204965 ],\n \"samples_ts\": [ 4.59512, 4.57346, 4.5372 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T07:11:05Z", - "avg_ns": 6446922013, - "stddev_ns": 3429637782, - "avg_ts": 23.591416, - "stddev_ts": 10.86819, - "samples_ns": [ - 3750690866, - 5283032706, - 10307042467 - ], - "samples_ts": [ - 34.127, - 24.2285, - 12.4187 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T07:11:27Z", - "avg_ns": 28018136236, - "stddev_ns": 179749730, - "avg_ts": 4.568595, - "stddev_ts": 0.029264, - "samples_ns": [ - 27855622639, - 27987581106, - 28211204965 - ], - "samples_ts": [ - 4.59512, - 4.57346, - 4.5372 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 420 - }, - { - "timestamp_utc": "2025-12-09T07:16:14.040939+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:12:53Z\",\n \"avg_ns\": 10262880463,\n \"stddev_ns\": 20786433,\n \"avg_ts\": 12.472166,\n \"stddev_ts\": 0.025272,\n \"samples_ns\": [ 10265941938, 10281965881, 10240733572 ],\n \"samples_ts\": [ 12.4684, 12.449, 12.4991 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:13:34Z\",\n \"avg_ns\": 52999698263,\n \"stddev_ns\": 4084973456,\n \"avg_ts\": 12.879699,\n \"stddev_ts\": 6.637390,\n \"samples_ns\": [ 97590075269, 29608953819, 31800065703 ],\n \"samples_ts\": [ 5.24644, 17.2921, 16.1006 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T07:12:53Z", - "avg_ns": 10262880463, - "stddev_ns": 20786433, - "avg_ts": 12.472166, - "stddev_ts": 0.025272, - "samples_ns": [ - 10265941938, - 10281965881, - 10240733572 - ], - "samples_ts": [ - 12.4684, - 12.449, - 12.4991 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T07:13:34Z", - "avg_ns": 52999698263, - "stddev_ns": 4084973456, - "avg_ts": 12.879699, - "stddev_ts": 6.63739, - "samples_ns": [ - 97590075269, - 29608953819, - 31800065703 - ], - "samples_ts": [ - 5.24644, - 17.2921, - 16.1006 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 421 - }, - { - "timestamp_utc": "2025-12-09T07:20:11.112482+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:16:14Z\",\n \"avg_ns\": 41041521055,\n \"stddev_ns\": 66534072,\n \"avg_ts\": 12.475193,\n \"stddev_ts\": 0.020207,\n \"samples_ns\": [ 41117604997, 40994250481, 41012707688 ],\n \"samples_ts\": [ 12.4521, 12.4896, 12.4839 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:18:57Z\",\n \"avg_ns\": 24442348395,\n \"stddev_ns\": 1590458200,\n \"avg_ts\": 5.521992,\n \"stddev_ts\": 1.665273,\n \"samples_ns\": [ 27962275121, 28171522695, 17193247371 ],\n \"samples_ts\": [ 4.5776, 4.5436, 7.44478 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T07:16:14Z", - "avg_ns": 41041521055, - "stddev_ns": 66534072, - "avg_ts": 12.475193, - "stddev_ts": 0.020207, - "samples_ns": [ - 41117604997, - 40994250481, - 41012707688 - ], - "samples_ts": [ - 12.4521, - 12.4896, - 12.4839 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T07:18:57Z", - "avg_ns": 24442348395, - "stddev_ns": 1590458200, - "avg_ts": 5.521992, - "stddev_ts": 1.665273, - "samples_ns": [ - 27962275121, - 28171522695, - 17193247371 - ], - "samples_ts": [ - 4.5776, - 4.5436, - 7.44478 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 422 - }, - { - "timestamp_utc": "2025-12-09T07:25:20.888171+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:20:12Z\",\n \"avg_ns\": 11529210758,\n \"stddev_ns\": 5568548,\n \"avg_ts\": 44.408944,\n \"stddev_ts\": 0.021451,\n \"samples_ns\": [ 11531543427, 11533232506, 11522856342 ],\n \"samples_ts\": [ 44.4, 44.3935, 44.4334 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:20:58Z\",\n \"avg_ns\": 87516916362,\n \"stddev_ns\": 650618825,\n \"avg_ts\": 6.080474,\n \"stddev_ts\": 1.353599,\n \"samples_ns\": [ 73847158363, 113281631123, 75421959601 ],\n \"samples_ts\": [ 6.93324, 4.51971, 6.78847 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T07:20:12Z", - "avg_ns": 11529210758, - "stddev_ns": 5568548, - "avg_ts": 44.408944, - "stddev_ts": 0.021451, - "samples_ns": [ - 11531543427, - 11533232506, - 11522856342 - ], - "samples_ts": [ - 44.4, - 44.3935, - 44.4334 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T07:20:58Z", - "avg_ns": 87516916362, - "stddev_ns": 650618825, - "avg_ts": 6.080474, - "stddev_ts": 1.353599, - "samples_ns": [ - 73847158363, - 113281631123, - 75421959601 - ], - "samples_ts": [ - 6.93324, - 4.51971, - 6.78847 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 423 - }, - { - "timestamp_utc": "2025-12-09T07:25:55.813401+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:25:21Z\",\n \"avg_ns\": 2872100797,\n \"stddev_ns\": 5148492,\n \"avg_ts\": 44.566777,\n \"stddev_ts\": 0.079962,\n \"samples_ns\": [ 2874718690, 2875413670, 2866170033 ],\n \"samples_ts\": [ 44.5261, 44.5153, 44.6589 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:25:33Z\",\n \"avg_ns\": 7465009040,\n \"stddev_ns\": 25670789,\n \"avg_ts\": 17.146799,\n \"stddev_ts\": 0.058853,\n \"samples_ns\": [ 7453004240, 7447540436, 7494482446 ],\n \"samples_ts\": [ 17.1743, 17.1869, 17.0792 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T07:25:21Z", - "avg_ns": 2872100797, - "stddev_ns": 5148492, - "avg_ts": 44.566777, - "stddev_ts": 0.079962, - "samples_ns": [ - 2874718690, - 2875413670, - 2866170033 - ], - "samples_ts": [ - 44.5261, - 44.5153, - 44.6589 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T07:25:33Z", - "avg_ns": 7465009040, - "stddev_ns": 25670789, - "avg_ts": 17.146799, - "stddev_ts": 0.058853, - "samples_ns": [ - 7453004240, - 7447540436, - 7494482446 - ], - "samples_ts": [ - 17.1743, - 17.1869, - 17.0792 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 424 - }, - { - "timestamp_utc": "2025-12-09T07:30:30.356553+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:25:56Z\",\n \"avg_ns\": 2968788854,\n \"stddev_ns\": 182195480,\n \"avg_ts\": 43.220038,\n \"stddev_ts\": 2.561822,\n \"samples_ns\": [ 2867103987, 2860131024, 3179131551 ],\n \"samples_ts\": [ 44.6444, 44.7532, 40.2626 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:26:08Z\",\n \"avg_ns\": 87233530937,\n \"stddev_ns\": 4045681622,\n \"avg_ts\": 6.728949,\n \"stddev_ts\": 3.318205,\n \"samples_ns\": [ 100462673056, 112693997233, 48543922524 ],\n \"samples_ts\": [ 5.09642, 4.54328, 10.5471 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T07:25:56Z", - "avg_ns": 2968788854, - "stddev_ns": 182195480, - "avg_ts": 43.220038, - "stddev_ts": 2.561822, - "samples_ns": [ - 2867103987, - 2860131024, - 3179131551 - ], - "samples_ts": [ - 44.6444, - 44.7532, - 40.2626 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T07:26:08Z", - "avg_ns": 87233530937, - "stddev_ns": 4045681622, - "avg_ts": 6.728949, - "stddev_ts": 3.318205, - "samples_ns": [ - 100462673056, - 112693997233, - 48543922524 - ], - "samples_ts": [ - 5.09642, - 4.54328, - 10.5471 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 425 - }, - { - "timestamp_utc": "2025-12-09T07:32:53.483877+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:30:31Z\",\n \"avg_ns\": 15665930528,\n \"stddev_ns\": 3865289541,\n \"avg_ts\": 36.774562,\n \"stddev_ts\": 13.356566,\n \"samples_ns\": [ 11516504721, 11502001713, 23979285150 ],\n \"samples_ts\": [ 44.4579, 44.514, 21.3518 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:31:29Z\",\n \"avg_ns\": 27759704967,\n \"stddev_ns\": 372362596,\n \"avg_ts\": 4.611551,\n \"stddev_ts\": 0.061571,\n \"samples_ns\": [ 27666457364, 28169829204, 27442828333 ],\n \"samples_ts\": [ 4.62654, 4.54387, 4.66424 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T07:30:31Z", - "avg_ns": 15665930528, - "stddev_ns": 3865289541, - "avg_ts": 36.774562, - "stddev_ts": 13.356566, - "samples_ns": [ - 11516504721, - 11502001713, - 23979285150 - ], - "samples_ts": [ - 44.4579, - 44.514, - 21.3518 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T07:31:29Z", - "avg_ns": 27759704967, - "stddev_ns": 372362596, - "avg_ts": 4.611551, - "stddev_ts": 0.061571, - "samples_ns": [ - 27666457364, - 28169829204, - 27442828333 - ], - "samples_ts": [ - 4.62654, - 4.54387, - 4.66424 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 426 - }, - { - "timestamp_utc": "2025-12-09T07:38:14.059040+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:32:55Z\",\n \"avg_ns\": 33340479026,\n \"stddev_ns\": 4173451634,\n \"avg_ts\": 17.434528,\n \"stddev_ts\": 8.325906,\n \"samples_ns\": [ 40554380643, 40538064147, 18928992290 ],\n \"samples_ts\": [ 12.625, 12.6301, 27.0485 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:35:15Z\",\n \"avg_ns\": 59356136508,\n \"stddev_ns\": 3201715394,\n \"avg_ts\": 11.849597,\n \"stddev_ts\": 6.520323,\n \"samples_ns\": [ 29643758975, 37475372768, 110949277781 ],\n \"samples_ts\": [ 17.2718, 13.6623, 4.61472 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T07:32:55Z", - "avg_ns": 33340479026, - "stddev_ns": 4173451634, - "avg_ts": 17.434528, - "stddev_ts": 8.325906, - "samples_ns": [ - 40554380643, - 40538064147, - 18928992290 - ], - "samples_ts": [ - 12.625, - 12.6301, - 27.0485 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T07:35:15Z", - "avg_ns": 59356136508, - "stddev_ns": 3201715394, - "avg_ts": 11.849597, - "stddev_ts": 6.520323, - "samples_ns": [ - 29643758975, - 37475372768, - 110949277781 - ], - "samples_ts": [ - 17.2718, - 13.6623, - 4.61472 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 427 - }, - { - "timestamp_utc": "2025-12-09T07:40:10.272586+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:38:15Z\",\n \"avg_ns\": 10130307359,\n \"stddev_ns\": 23698341,\n \"avg_ts\": 12.635398,\n \"stddev_ts\": 0.029581,\n \"samples_ns\": [ 10104326484, 10135858934, 10150736661 ],\n \"samples_ts\": [ 12.6678, 12.6284, 12.6099 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:38:56Z\",\n \"avg_ns\": 24561605047,\n \"stddev_ns\": 2093170272,\n \"avg_ts\": 5.363725,\n \"stddev_ts\": 1.174763,\n \"samples_ns\": [ 27491420485, 27145183708, 19048210949 ],\n \"samples_ts\": [ 4.656, 4.71539, 6.71979 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T07:38:15Z", - "avg_ns": 10130307359, - "stddev_ns": 23698341, - "avg_ts": 12.635398, - "stddev_ts": 0.029581, - "samples_ns": [ - 10104326484, - 10135858934, - 10150736661 - ], - "samples_ts": [ - 12.6678, - 12.6284, - 12.6099 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T07:38:56Z", - "avg_ns": 24561605047, - "stddev_ns": 2093170272, - "avg_ts": 5.363725, - "stddev_ts": 1.174763, - "samples_ns": [ - 27491420485, - 27145183708, - 19048210949 - ], - "samples_ts": [ - 4.656, - 4.71539, - 6.71979 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 428 - }, - { - "timestamp_utc": "2025-12-09T07:43:31.512281+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:40:11Z\",\n \"avg_ns\": 2875457047,\n \"stddev_ns\": 6773196,\n \"avg_ts\": 44.514827,\n \"stddev_ts\": 0.104795,\n \"samples_ns\": [ 2882707851, 2869293398, 2874369893 ],\n \"samples_ts\": [ 44.4027, 44.6103, 44.5315 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:40:22Z\",\n \"avg_ns\": 62867909103,\n \"stddev_ns\": 3739027780,\n \"avg_ts\": 10.848834,\n \"stddev_ts\": 6.582534,\n \"samples_ns\": [ 28773166347, 50945603734, 108884957228 ],\n \"samples_ts\": [ 17.7944, 10.0499, 4.70221 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T07:40:11Z", - "avg_ns": 2875457047, - "stddev_ns": 6773196, - "avg_ts": 44.514827, - "stddev_ts": 0.104795, - "samples_ns": [ - 2882707851, - 2869293398, - 2874369893 - ], - "samples_ts": [ - 44.4027, - 44.6103, - 44.5315 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T07:40:22Z", - "avg_ns": 62867909103, - "stddev_ns": 3739027780, - "avg_ts": 10.848834, - "stddev_ts": 6.582534, - "samples_ns": [ - 28773166347, - 50945603734, - 108884957228 - ], - "samples_ts": [ - 17.7944, - 10.0499, - 4.70221 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 429 - }, - { - "timestamp_utc": "2025-12-09T07:45:56.229309+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:43:33Z\",\n \"avg_ns\": 26843806232,\n \"stddev_ns\": 4270019504,\n \"avg_ts\": 24.778073,\n \"stddev_ts\": 16.430543,\n \"samples_ns\": [ 40840002468, 27908198271, 11783217957 ],\n \"samples_ts\": [ 12.5367, 18.3459, 43.4516 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:45:34Z\",\n \"avg_ns\": 7160232695,\n \"stddev_ns\": 22363831,\n \"avg_ts\": 17.876630,\n \"stddev_ts\": 0.055734,\n \"samples_ns\": [ 7147118711, 7147524399, 7186054976 ],\n \"samples_ts\": [ 17.9093, 17.9083, 17.8123 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T07:43:33Z", - "avg_ns": 26843806232, - "stddev_ns": 4270019504, - "avg_ts": 24.778073, - "stddev_ts": 16.430543, - "samples_ns": [ - 40840002468, - 27908198271, - 11783217957 - ], - "samples_ts": [ - 12.5367, - 18.3459, - 43.4516 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T07:45:34Z", - "avg_ns": 7160232695, - "stddev_ns": 22363831, - "avg_ts": 17.87663, - "stddev_ts": 0.055734, - "samples_ns": [ - 7147118711, - 7147524399, - 7186054976 - ], - "samples_ts": [ - 17.9093, - 17.9083, - 17.8123 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 430 - }, - { - "timestamp_utc": "2025-12-09T07:51:00.870705+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:45:57Z\",\n \"avg_ns\": 37311979205,\n \"stddev_ns\": 1445339747,\n \"avg_ts\": 13.939086,\n \"stddev_ts\": 2.225835,\n \"samples_ns\": [ 31012929432, 40486040246, 40436967938 ],\n \"samples_ts\": [ 16.5092, 12.6463, 12.6617 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:48:01Z\",\n \"avg_ns\": 59779969672,\n \"stddev_ns\": 4126704634,\n \"avg_ts\": 11.663083,\n \"stddev_ts\": 6.598231,\n \"samples_ns\": [ 109738978999, 40791625077, 28809304941 ],\n \"samples_ts\": [ 4.66562, 12.5516, 17.772 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T07:45:57Z", - "avg_ns": 37311979205, - "stddev_ns": 1445339747, - "avg_ts": 13.939086, - "stddev_ts": 2.225835, - "samples_ns": [ - 31012929432, - 40486040246, - 40436967938 - ], - "samples_ts": [ - 16.5092, - 12.6463, - 12.6617 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_type": "gemma3 1B Q2_K - Medium", - "model_size": 683281408, - "model_n_params": 999885952, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T07:48:01Z", - "avg_ns": 59779969672, - "stddev_ns": 4126704634, - "avg_ts": 11.663083, - "stddev_ts": 6.598231, - "samples_ns": [ - 109738978999, - 40791625077, - 28809304941 - ], - "samples_ts": [ - 4.66562, - 12.5516, - 17.772 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-1B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 431 - }, - { - "timestamp_utc": "2025-12-09T07:56:27.677903+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:51:25Z\",\n \"avg_ns\": 33231225053,\n \"stddev_ns\": 35483146,\n \"avg_ts\": 3.851802,\n \"stddev_ts\": 0.004112,\n \"samples_ns\": [ 33269086727, 33198734810, 33225853624 ],\n \"samples_ts\": [ 3.84742, 3.85557, 3.85242 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:53:29Z\",\n \"avg_ns\": 58537644411,\n \"stddev_ns\": 4225363977,\n \"avg_ts\": 2.319446,\n \"stddev_ts\": 0.645973,\n \"samples_ns\": [ 79035893301, 52327665202, 44249374732 ],\n \"samples_ts\": [ 1.61952, 2.44612, 2.8927 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T07:51:25Z", - "avg_ns": 33231225053, - "stddev_ns": 35483146, - "avg_ts": 3.851802, - "stddev_ts": 0.004112, - "samples_ns": [ - 33269086727, - 33198734810, - 33225853624 - ], - "samples_ts": [ - 3.84742, - 3.85557, - 3.85242 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T07:53:29Z", - "avg_ns": 58537644411, - "stddev_ns": 4225363977, - "avg_ts": 2.319446, - "stddev_ts": 0.645973, - "samples_ns": [ - 79035893301, - 52327665202, - 44249374732 - ], - "samples_ts": [ - 1.61952, - 2.44612, - 2.8927 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 432 - }, - { - "timestamp_utc": "2025-12-09T08:10:26.597412+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:56:31Z\",\n \"avg_ns\": 26175080219,\n \"stddev_ns\": 3963803463,\n \"avg_ts\": 5.175508,\n \"stddev_ts\": 1.560887,\n \"samples_ns\": [ 18523014597, 27053292677, 32948933383 ],\n \"samples_ts\": [ 6.91032, 4.7314, 3.8848 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:58:08Z\",\n \"avg_ns\": 245758125543,\n \"stddev_ns\": 1243743502,\n \"avg_ts\": 2.087778,\n \"stddev_ts\": 0.119696,\n \"samples_ns\": [ 230011033588, 253441514068, 253821828973 ],\n \"samples_ts\": [ 2.22598, 2.02019, 2.01716 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T07:56:31Z", - "avg_ns": 26175080219, - "stddev_ns": 3963803463, - "avg_ts": 5.175508, - "stddev_ts": 1.560887, - "samples_ns": [ - 18523014597, - 27053292677, - 32948933383 - ], - "samples_ts": [ - 6.91032, - 4.7314, - 3.8848 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T07:58:08Z", - "avg_ns": 245758125543, - "stddev_ns": 1243743502, - "avg_ts": 2.087778, - "stddev_ts": 0.119696, - "samples_ns": [ - 230011033588, - 253441514068, - 253821828973 - ], - "samples_ts": [ - 2.22598, - 2.02019, - 2.01716 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 433 - }, - { - "timestamp_utc": "2025-12-09T08:20:14.657767+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T08:10:29Z\",\n \"avg_ns\": 100346627523,\n \"stddev_ns\": 4249783711,\n \"avg_ts\": 5.306378,\n \"stddev_ts\": 1.364666,\n \"samples_ns\": [ 111730946325, 114895449903, 74413486343 ],\n \"samples_ts\": [ 4.58244, 4.45623, 6.88047 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T08:16:45Z\",\n \"avg_ns\": 69600215899,\n \"stddev_ns\": 4212784660,\n \"avg_ts\": 1.886254,\n \"stddev_ts\": 0.383697,\n \"samples_ns\": [ 55068956520, 79502772352, 74228918826 ],\n \"samples_ts\": [ 2.32436, 1.61001, 1.7244 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T08:10:29Z", - "avg_ns": 100346627523, - "stddev_ns": 4249783711, - "avg_ts": 5.306378, - "stddev_ts": 1.364666, - "samples_ns": [ - 111730946325, - 114895449903, - 74413486343 - ], - "samples_ts": [ - 4.58244, - 4.45623, - 6.88047 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T08:16:45Z", - "avg_ns": 69600215899, - "stddev_ns": 4212784660, - "avg_ts": 1.886254, - "stddev_ts": 0.383697, - "samples_ns": [ - 55068956520, - 79502772352, - 74228918826 - ], - "samples_ts": [ - 2.32436, - 1.61001, - 1.7244 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 434 - }, - { - "timestamp_utc": "2025-12-09T08:38:27.361398+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T08:20:17Z\",\n \"avg_ns\": 99400753159,\n \"stddev_ns\": 4128133897,\n \"avg_ts\": 5.394805,\n \"stddev_ts\": 1.419341,\n \"samples_ns\": [ 97723424577, 126119734887, 74359100014 ],\n \"samples_ts\": [ 5.23928, 4.05963, 6.88551 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T08:26:30Z\",\n \"avg_ns\": 238719141079,\n \"stddev_ns\": 3587034107,\n \"avg_ts\": 2.154808,\n \"stddev_ts\": 0.183824,\n \"samples_ns\": [ 253533756937, 246049440238, 216574226063 ],\n \"samples_ts\": [ 2.01945, 2.08088, 2.36409 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T08:20:17Z", - "avg_ns": 99400753159, - "stddev_ns": 4128133897, - "avg_ts": 5.394805, - "stddev_ts": 1.419341, - "samples_ns": [ - 97723424577, - 126119734887, - 74359100014 - ], - "samples_ts": [ - 5.23928, - 4.05963, - 6.88551 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T08:26:30Z", - "avg_ns": 238719141079, - "stddev_ns": 3587034107, - "avg_ts": 2.154808, - "stddev_ts": 0.183824, - "samples_ns": [ - 253533756937, - 246049440238, - 216574226063 - ], - "samples_ts": [ - 2.01945, - 2.08088, - 2.36409 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 435 - }, - { - "timestamp_utc": "2025-12-09T08:43:06.628728+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T08:38:32Z\",\n \"avg_ns\": 27551119882,\n \"stddev_ns\": 3884969453,\n \"avg_ts\": 4.959695,\n \"stddev_ts\": 1.661153,\n \"samples_ns\": [ 33262708490, 30756939597, 18633711559 ],\n \"samples_ts\": [ 3.84815, 4.16166, 6.86927 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T08:40:29Z\",\n \"avg_ns\": 52274384428,\n \"stddev_ns\": 4022060820,\n \"avg_ts\": 2.556845,\n \"stddev_ts\": 0.599107,\n \"samples_ns\": [ 44107785991, 44084769717, 68630597578 ],\n \"samples_ts\": [ 2.90198, 2.9035, 1.86506 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T08:38:32Z", - "avg_ns": 27551119882, - "stddev_ns": 3884969453, - "avg_ts": 4.959695, - "stddev_ts": 1.661153, - "samples_ns": [ - 33262708490, - 30756939597, - 18633711559 - ], - "samples_ts": [ - 3.84815, - 4.16166, - 6.86927 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T08:40:29Z", - "avg_ns": 52274384428, - "stddev_ns": 4022060820, - "avg_ts": 2.556845, - "stddev_ts": 0.599107, - "samples_ns": [ - 44107785991, - 44084769717, - 68630597578 - ], - "samples_ts": [ - 2.90198, - 2.9035, - 1.86506 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 436 - }, - { - "timestamp_utc": "2025-12-09T08:57:01.375475+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T08:43:12Z\",\n \"avg_ns\": 30660076647,\n \"stddev_ns\": 3613019620,\n \"avg_ts\": 4.248473,\n \"stddev_ts\": 0.717769,\n \"samples_ns\": [ 33317550893, 33452098652, 25210580396 ],\n \"samples_ts\": [ 3.84182, 3.82637, 5.07723 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T08:45:17Z\",\n \"avg_ns\": 234335529756,\n \"stddev_ns\": 1726385990,\n \"avg_ts\": 2.185716,\n \"stddev_ts\": 0.051726,\n \"samples_ns\": [ 234556609349, 228692009992, 239757969928 ],\n \"samples_ts\": [ 2.18284, 2.23882, 2.13549 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T08:43:12Z", - "avg_ns": 30660076647, - "stddev_ns": 3613019620, - "avg_ts": 4.248473, - "stddev_ts": 0.717769, - "samples_ns": [ - 33317550893, - 33452098652, - 25210580396 - ], - "samples_ts": [ - 3.84182, - 3.82637, - 5.07723 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T08:45:17Z", - "avg_ns": 234335529756, - "stddev_ns": 1726385990, - "avg_ts": 2.185716, - "stddev_ts": 0.051726, - "samples_ns": [ - 234556609349, - 228692009992, - 239757969928 - ], - "samples_ts": [ - 2.18284, - 2.23882, - 2.13549 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 437 - }, - { - "timestamp_utc": "2025-12-09T09:06:48.629051+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T08:57:04Z\",\n \"avg_ns\": 99233535958,\n \"stddev_ns\": 1756507456,\n \"avg_ts\": 5.380567,\n \"stddev_ts\": 1.373894,\n \"samples_ns\": [ 100436853407, 74484997992, 122778756475 ],\n \"samples_ts\": [ 5.09773, 6.87387, 4.1701 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:04:06Z\",\n \"avg_ns\": 53896421188,\n \"stddev_ns\": 887785056,\n \"avg_ts\": 2.492970,\n \"stddev_ts\": 0.615253,\n \"samples_ns\": [ 71806959988, 44868452506, 45013851070 ],\n \"samples_ts\": [ 1.78256, 2.85278, 2.84357 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T08:57:04Z", - "avg_ns": 99233535958, - "stddev_ns": 1756507456, - "avg_ts": 5.380567, - "stddev_ts": 1.373894, - "samples_ns": [ - 100436853407, - 74484997992, - 122778756475 - ], - "samples_ts": [ - 5.09773, - 6.87387, - 4.1701 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T09:04:06Z", - "avg_ns": 53896421188, - "stddev_ns": 887785056, - "avg_ts": 2.49297, - "stddev_ts": 0.615253, - "samples_ns": [ - 71806959988, - 44868452506, - 45013851070 - ], - "samples_ts": [ - 1.78256, - 2.85278, - 2.84357 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 438 - }, - { - "timestamp_utc": "2025-12-09T09:26:03.594188+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:06:51Z\",\n \"avg_ns\": 99278268550,\n \"stddev_ns\": 3399112710,\n \"avg_ts\": 5.348635,\n \"stddev_ts\": 1.324222,\n \"samples_ns\": [ 110547902109, 74453722317, 112833181225 ],\n \"samples_ts\": [ 4.63148, 6.87675, 4.53767 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:13:43Z\",\n \"avg_ns\": 246362558910,\n \"stddev_ns\": 3772086854,\n \"avg_ts\": 2.085125,\n \"stddev_ts\": 0.149250,\n \"samples_ns\": [ 227121168201, 252342311734, 259624196797 ],\n \"samples_ts\": [ 2.2543, 2.02899, 1.97208 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T09:06:51Z", - "avg_ns": 99278268550, - "stddev_ns": 3399112710, - "avg_ts": 5.348635, - "stddev_ts": 1.324222, - "samples_ns": [ - 110547902109, - 74453722317, - 112833181225 - ], - "samples_ts": [ - 4.63148, - 6.87675, - 4.53767 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T09:13:43Z", - "avg_ns": 246362558910, - "stddev_ns": 3772086854, - "avg_ts": 2.085125, - "stddev_ts": 0.14925, - "samples_ns": [ - 227121168201, - 252342311734, - 259624196797 - ], - "samples_ts": [ - 2.2543, - 2.02899, - 1.97208 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 439 - }, - { - "timestamp_utc": "2025-12-09T09:30:54.427184+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:26:06Z\",\n \"avg_ns\": 19767831413,\n \"stddev_ns\": 3660627824,\n \"avg_ts\": 6.519004,\n \"stddev_ts\": 0.636029,\n \"samples_ns\": [ 18569897485, 18605963415, 22127633339 ],\n \"samples_ts\": [ 6.89288, 6.87951, 5.78462 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:27:24Z\",\n \"avg_ns\": 69596397591,\n \"stddev_ns\": 3431059687,\n \"avg_ts\": 1.979469,\n \"stddev_ts\": 0.708942,\n \"samples_ns\": [ 81985961804, 81056584649, 45746646320 ],\n \"samples_ts\": [ 1.56124, 1.57914, 2.79802 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T09:26:06Z", - "avg_ns": 19767831413, - "stddev_ns": 3660627824, - "avg_ts": 6.519004, - "stddev_ts": 0.636029, - "samples_ns": [ - 18569897485, - 18605963415, - 22127633339 - ], - "samples_ts": [ - 6.89288, - 6.87951, - 5.78462 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T09:27:24Z", - "avg_ns": 69596397591, - "stddev_ns": 3431059687, - "avg_ts": 1.979469, - "stddev_ts": 0.708942, - "samples_ns": [ - 81985961804, - 81056584649, - 45746646320 - ], - "samples_ts": [ - 1.56124, - 1.57914, - 2.79802 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 440 - }, - { - "timestamp_utc": "2025-12-09T09:45:14.399236+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:30:57Z\",\n \"avg_ns\": 18561843303,\n \"stddev_ns\": 47173261,\n \"avg_ts\": 6.895897,\n \"stddev_ts\": 0.017500,\n \"samples_ns\": [ 18538854457, 18530571921, 18616103533 ],\n \"samples_ts\": [ 6.90442, 6.9075, 6.87577 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:32:11Z\",\n \"avg_ns\": 260643395267,\n \"stddev_ns\": 1369070592,\n \"avg_ts\": 1.964406,\n \"stddev_ts\": 0.010288,\n \"samples_ns\": [ 259770938534, 259937928200, 262221319068 ],\n \"samples_ts\": [ 1.97097, 1.9697, 1.95255 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T09:30:57Z", - "avg_ns": 18561843303, - "stddev_ns": 47173261, - "avg_ts": 6.895897, - "stddev_ts": 0.0175, - "samples_ns": [ - 18538854457, - 18530571921, - 18616103533 - ], - "samples_ts": [ - 6.90442, - 6.9075, - 6.87577 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T09:32:11Z", - "avg_ns": 260643395267, - "stddev_ns": 1369070592, - "avg_ts": 1.964406, - "stddev_ts": 0.010288, - "samples_ns": [ - 259770938534, - 259937928200, - 262221319068 - ], - "samples_ts": [ - 1.97097, - 1.9697, - 1.95255 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 441 - }, - { - "timestamp_utc": "2025-12-09T09:55:03.364568+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:45:17Z\",\n \"avg_ns\": 100535311722,\n \"stddev_ns\": 4118428610,\n \"avg_ts\": 5.320161,\n \"stddev_ts\": 1.375271,\n \"samples_ns\": [ 100492763452, 125754324304, 75358847411 ],\n \"samples_ts\": [ 5.09489, 4.07143, 6.79416 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:51:34Z\",\n \"avg_ns\": 69282929863,\n \"stddev_ns\": 4214010172,\n \"avg_ts\": 1.967262,\n \"stddev_ts\": 0.648603,\n \"samples_ns\": [ 47124613571, 80309330282, 80414845737 ],\n \"samples_ts\": [ 2.7162, 1.59384, 1.59175 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T09:45:17Z", - "avg_ns": 100535311722, - "stddev_ns": 4118428610, - "avg_ts": 5.320161, - "stddev_ts": 1.375271, - "samples_ns": [ - 100492763452, - 125754324304, - 75358847411 - ], - "samples_ts": [ - 5.09489, - 4.07143, - 6.79416 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T09:51:34Z", - "avg_ns": 69282929863, - "stddev_ns": 4214010172, - "avg_ts": 1.967262, - "stddev_ts": 0.648603, - "samples_ns": [ - 47124613571, - 80309330282, - 80414845737 - ], - "samples_ts": [ - 2.7162, - 1.59384, - 1.59175 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 442 - }, - { - "timestamp_utc": "2025-12-09T10:13:36.883048+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:55:08Z\",\n \"avg_ns\": 100551874124,\n \"stddev_ns\": 3988984286,\n \"avg_ts\": 5.380466,\n \"stddev_ts\": 1.481127,\n \"samples_ns\": [ 92985289896, 133317386120, 75352946358 ],\n \"samples_ts\": [ 5.50625, 3.84046, 6.79469 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:01:25Z\",\n \"avg_ns\": 243528990204,\n \"stddev_ns\": 2017275858,\n \"avg_ts\": 2.110244,\n \"stddev_ts\": 0.160407,\n \"samples_ns\": [ 256768273055, 250555047474, 223263650085 ],\n \"samples_ts\": [ 1.99402, 2.04346, 2.29325 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T09:55:08Z", - "avg_ns": 100551874124, - "stddev_ns": 3988984286, - "avg_ts": 5.380466, - "stddev_ts": 1.481127, - "samples_ns": [ - 92985289896, - 133317386120, - 75352946358 - ], - "samples_ts": [ - 5.50625, - 3.84046, - 6.79469 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T10:01:25Z", - "avg_ns": 243528990204, - "stddev_ns": 2017275858, - "avg_ts": 2.110244, - "stddev_ts": 0.160407, - "samples_ns": [ - 256768273055, - 250555047474, - 223263650085 - ], - "samples_ts": [ - 1.99402, - 2.04346, - 2.29325 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 443 - }, - { - "timestamp_utc": "2025-12-09T10:18:26.852720+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:13:42Z\",\n \"avg_ns\": 26230556441,\n \"stddev_ns\": 4271293362,\n \"avg_ts\": 5.171493,\n \"stddev_ts\": 1.564468,\n \"samples_ns\": [ 33402265682, 26714391619, 18575012022 ],\n \"samples_ts\": [ 3.83208, 4.79142, 6.89098 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:15:34Z\",\n \"avg_ns\": 57139951745,\n \"stddev_ns\": 3298501493,\n \"avg_ts\": 2.391619,\n \"stddev_ts\": 0.674545,\n \"samples_ns\": [ 45992187165, 46058890924, 79368777148 ],\n \"samples_ts\": [ 2.78308, 2.77905, 1.61272 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T10:13:42Z", - "avg_ns": 26230556441, - "stddev_ns": 4271293362, - "avg_ts": 5.171493, - "stddev_ts": 1.564468, - "samples_ns": [ - 33402265682, - 26714391619, - 18575012022 - ], - "samples_ts": [ - 3.83208, - 4.79142, - 6.89098 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T10:15:34Z", - "avg_ns": 57139951745, - "stddev_ns": 3298501493, - "avg_ts": 2.391619, - "stddev_ts": 0.674545, - "samples_ns": [ - 45992187165, - 46058890924, - 79368777148 - ], - "samples_ts": [ - 2.78308, - 2.77905, - 1.61272 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 444 - }, - { - "timestamp_utc": "2025-12-09T10:32:21.345518+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:18:32Z\",\n \"avg_ns\": 27650251250,\n \"stddev_ns\": 4244281591,\n \"avg_ts\": 4.957646,\n \"stddev_ts\": 1.704023,\n \"samples_ns\": [ 33390673821, 31058291943, 18501787988 ],\n \"samples_ts\": [ 3.83341, 4.12128, 6.91825 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:20:29Z\",\n \"avg_ns\": 237171088657,\n \"stddev_ns\": 3568454228,\n \"avg_ts\": 2.161403,\n \"stddev_ts\": 0.091098,\n \"samples_ns\": [ 249000857036, 231156100549, 231356308387 ],\n \"samples_ts\": [ 2.05622, 2.21495, 2.21304 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T10:18:32Z", - "avg_ns": 27650251250, - "stddev_ns": 4244281591, - "avg_ts": 4.957646, - "stddev_ts": 1.704023, - "samples_ns": [ - 33390673821, - 31058291943, - 18501787988 - ], - "samples_ts": [ - 3.83341, - 4.12128, - 6.91825 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T10:20:29Z", - "avg_ns": 237171088657, - "stddev_ns": 3568454228, - "avg_ts": 2.161403, - "stddev_ts": 0.091098, - "samples_ns": [ - 249000857036, - 231156100549, - 231356308387 - ], - "samples_ts": [ - 2.05622, - 2.21495, - 2.21304 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 445 - }, - { - "timestamp_utc": "2025-12-09T10:42:15.702753+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:32:27Z\",\n \"avg_ns\": 101980083885,\n \"stddev_ns\": 288975309,\n \"avg_ts\": 5.283309,\n \"stddev_ts\": 1.337800,\n \"samples_ns\": [ 86691760413, 82630277033, 136618214211 ],\n \"samples_ts\": [ 5.90598, 6.19628, 3.74767 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:39:48Z\",\n \"avg_ns\": 48715524111,\n \"stddev_ns\": 3335483261,\n \"avg_ts\": 2.641840,\n \"stddev_ts\": 0.232317,\n \"samples_ns\": [ 53920050259, 45930227664, 46296294412 ],\n \"samples_ts\": [ 2.37389, 2.78684, 2.7648 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T10:32:27Z", - "avg_ns": 101980083885, - "stddev_ns": 288975309, - "avg_ts": 5.283309, - "stddev_ts": 1.3378, - "samples_ns": [ - 86691760413, - 82630277033, - 136618214211 - ], - "samples_ts": [ - 5.90598, - 6.19628, - 3.74767 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T10:39:48Z", - "avg_ns": 48715524111, - "stddev_ns": 3335483261, - "avg_ts": 2.64184, - "stddev_ts": 0.232317, - "samples_ns": [ - 53920050259, - 45930227664, - 46296294412 - ], - "samples_ts": [ - 2.37389, - 2.78684, - 2.7648 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 446 - }, - { - "timestamp_utc": "2025-12-09T11:01:41.975672+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:42:20Z\",\n \"avg_ns\": 101830406116,\n \"stddev_ns\": 3963502593,\n \"avg_ts\": 5.299406,\n \"stddev_ts\": 1.380421,\n \"samples_ns\": [ 89609258168, 79685731069, 136196229113 ],\n \"samples_ts\": [ 5.7137, 6.42524, 3.75928 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:49:42Z\",\n \"avg_ns\": 239630308475,\n \"stddev_ns\": 4278317820,\n \"avg_ts\": 2.143241,\n \"stddev_ts\": 0.143019,\n \"samples_ns\": [ 229709674586, 230351798657, 258829452183 ],\n \"samples_ts\": [ 2.2289, 2.22269, 1.97814 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T10:42:20Z", - "avg_ns": 101830406116, - "stddev_ns": 3963502593, - "avg_ts": 5.299406, - "stddev_ts": 1.380421, - "samples_ns": [ - 89609258168, - 79685731069, - 136196229113 - ], - "samples_ts": [ - 5.7137, - 6.42524, - 3.75928 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T10:49:42Z", - "avg_ns": 239630308475, - "stddev_ns": 4278317820, - "avg_ts": 2.143241, - "stddev_ts": 0.143019, - "samples_ns": [ - 229709674586, - 230351798657, - 258829452183 - ], - "samples_ts": [ - 2.2289, - 2.22269, - 1.97814 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 447 - }, - { - "timestamp_utc": "2025-12-09T11:06:33.138503+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:01:45Z\",\n \"avg_ns\": 29820411549,\n \"stddev_ns\": 845953741,\n \"avg_ts\": 4.434220,\n \"stddev_ts\": 1.034700,\n \"samples_ns\": [ 22739606347, 33284826556, 33436801745 ],\n \"samples_ts\": [ 5.62895, 3.8456, 3.82812 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:03:33Z\",\n \"avg_ns\": 59688609991,\n \"stddev_ns\": 1492539524,\n \"avg_ts\": 2.286738,\n \"stddev_ts\": 0.647674,\n \"samples_ns\": [ 82303451396, 50838018730, 45924359848 ],\n \"samples_ts\": [ 1.55522, 2.5178, 2.78719 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T11:01:45Z", - "avg_ns": 29820411549, - "stddev_ns": 845953741, - "avg_ts": 4.43422, - "stddev_ts": 1.0347, - "samples_ns": [ - 22739606347, - 33284826556, - 33436801745 - ], - "samples_ts": [ - 5.62895, - 3.8456, - 3.82812 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T11:03:33Z", - "avg_ns": 59688609991, - "stddev_ns": 1492539524, - "avg_ts": 2.286738, - "stddev_ts": 0.647674, - "samples_ns": [ - 82303451396, - 50838018730, - 45924359848 - ], - "samples_ts": [ - 1.55522, - 2.5178, - 2.78719 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 448 - }, - { - "timestamp_utc": "2025-12-09T11:20:54.826221+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:06:36Z\",\n \"avg_ns\": 27544660630,\n \"stddev_ns\": 2291756786,\n \"avg_ts\": 4.955513,\n \"stddev_ts\": 1.642299,\n \"samples_ns\": [ 18712754083, 30514774557, 33406453251 ],\n \"samples_ts\": [ 6.84025, 4.19469, 3.8316 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:08:17Z\",\n \"avg_ns\": 252108002803,\n \"stddev_ns\": 713155459,\n \"avg_ts\": 2.036612,\n \"stddev_ts\": 0.134858,\n \"samples_ns\": [ 233560812487, 262255843783, 260507352141 ],\n \"samples_ts\": [ 2.19215, 1.95229, 1.9654 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T11:06:36Z", - "avg_ns": 27544660630, - "stddev_ns": 2291756786, - "avg_ts": 4.955513, - "stddev_ts": 1.642299, - "samples_ns": [ - 18712754083, - 30514774557, - 33406453251 - ], - "samples_ts": [ - 6.84025, - 4.19469, - 3.8316 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T11:08:17Z", - "avg_ns": 252108002803, - "stddev_ns": 713155459, - "avg_ts": 2.036612, - "stddev_ts": 0.134858, - "samples_ns": [ - 233560812487, - 262255843783, - 260507352141 - ], - "samples_ts": [ - 2.19215, - 1.95229, - 1.9654 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 449 - }, - { - "timestamp_utc": "2025-12-09T11:30:43.565863+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:20:58Z\",\n \"avg_ns\": 100016043749,\n \"stddev_ns\": 3523753318,\n \"avg_ts\": 5.391614,\n \"stddev_ts\": 1.456849,\n \"samples_ns\": [ 130872583283, 94155025862, 75020522103 ],\n \"samples_ts\": [ 3.9122, 5.43784, 6.8248 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:27:13Z\",\n \"avg_ns\": 69950580474,\n \"stddev_ns\": 3863114980,\n \"avg_ts\": 1.902530,\n \"stddev_ts\": 0.486931,\n \"samples_ns\": [ 77276643642, 80614256712, 51960841070 ],\n \"samples_ts\": [ 1.65639, 1.58781, 2.46339 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T11:20:58Z", - "avg_ns": 100016043749, - "stddev_ns": 3523753318, - "avg_ts": 5.391614, - "stddev_ts": 1.456849, - "samples_ns": [ - 130872583283, - 94155025862, - 75020522103 - ], - "samples_ts": [ - 3.9122, - 5.43784, - 6.8248 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T11:27:13Z", - "avg_ns": 69950580474, - "stddev_ns": 3863114980, - "avg_ts": 1.90253, - "stddev_ts": 0.486931, - "samples_ns": [ - 77276643642, - 80614256712, - 51960841070 - ], - "samples_ts": [ - 1.65639, - 1.58781, - 2.46339 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 450 - }, - { - "timestamp_utc": "2025-12-09T11:49:42.405054+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:30:46Z\",\n \"avg_ns\": 100109602288,\n \"stddev_ns\": 513119417,\n \"avg_ts\": 5.330219,\n \"stddev_ts\": 1.365568,\n \"samples_ns\": [ 122409789617, 103071155996, 74847861252 ],\n \"samples_ts\": [ 4.18267, 4.96744, 6.84054 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:37:01Z\",\n \"avg_ns\": 253211709705,\n \"stddev_ns\": 2395102974,\n \"avg_ts\": 2.022934,\n \"stddev_ts\": 0.052950,\n \"samples_ns\": [ 257055313138, 256906850275, 245672965703 ],\n \"samples_ts\": [ 1.99179, 1.99294, 2.08407 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T11:30:46Z", - "avg_ns": 100109602288, - "stddev_ns": 513119417, - "avg_ts": 5.330219, - "stddev_ts": 1.365568, - "samples_ns": [ - 122409789617, - 103071155996, - 74847861252 - ], - "samples_ts": [ - 4.18267, - 4.96744, - 6.84054 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T11:37:01Z", - "avg_ns": 253211709705, - "stddev_ns": 2395102974, - "avg_ts": 2.022934, - "stddev_ts": 0.05295, - "samples_ns": [ - 257055313138, - 256906850275, - 245672965703 - ], - "samples_ts": [ - 1.99179, - 1.99294, - 2.08407 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 451 - }, - { - "timestamp_utc": "2025-12-09T11:54:25.486639+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:49:48Z\",\n \"avg_ns\": 18495890427,\n \"stddev_ns\": 19234620,\n \"avg_ts\": 6.920461,\n \"stddev_ts\": 0.007197,\n \"samples_ns\": [ 18476971284, 18515425720, 18495274277 ],\n \"samples_ts\": [ 6.92754, 6.91315, 6.92069 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:51:10Z\",\n \"avg_ns\": 64636021630,\n \"stddev_ns\": 2003146459,\n \"avg_ts\": 2.099723,\n \"stddev_ts\": 0.639106,\n \"samples_ns\": [ 45650106369, 66023101444, 82234857079 ],\n \"samples_ts\": [ 2.80394, 1.93872, 1.55652 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T11:49:48Z", - "avg_ns": 18495890427, - "stddev_ns": 19234620, - "avg_ts": 6.920461, - "stddev_ts": 0.007197, - "samples_ns": [ - 18476971284, - 18515425720, - 18495274277 - ], - "samples_ts": [ - 6.92754, - 6.91315, - 6.92069 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T11:51:10Z", - "avg_ns": 64636021630, - "stddev_ns": 2003146459, - "avg_ts": 2.099723, - "stddev_ts": 0.639106, - "samples_ns": [ - 45650106369, - 66023101444, - 82234857079 - ], - "samples_ts": [ - 2.80394, - 1.93872, - 1.55652 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 452 - }, - { - "timestamp_utc": "2025-12-09T12:08:15.934199+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:54:31Z\",\n \"avg_ns\": 19195734565,\n \"stddev_ns\": 690512380,\n \"avg_ts\": 6.673788,\n \"stddev_ts\": 0.235187,\n \"samples_ns\": [ 19993057553, 18800863602, 18793282541 ],\n \"samples_ts\": [ 6.40222, 6.8082, 6.81094 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:56:02Z\",\n \"avg_ns\": 244137035229,\n \"stddev_ns\": 854028484,\n \"avg_ts\": 2.103250,\n \"stddev_ts\": 0.138152,\n \"samples_ns\": [ 260674305374, 243208209677, 228528590636 ],\n \"samples_ts\": [ 1.96414, 2.10519, 2.24042 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T11:54:31Z", - "avg_ns": 19195734565, - "stddev_ns": 690512380, - "avg_ts": 6.673788, - "stddev_ts": 0.235187, - "samples_ns": [ - 19993057553, - 18800863602, - 18793282541 - ], - "samples_ts": [ - 6.40222, - 6.8082, - 6.81094 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T11:56:02Z", - "avg_ns": 244137035229, - "stddev_ns": 854028484, - "avg_ts": 2.10325, - "stddev_ts": 0.138152, - "samples_ns": [ - 260674305374, - 243208209677, - 228528590636 - ], - "samples_ts": [ - 1.96414, - 2.10519, - 2.24042 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 453 - }, - { - "timestamp_utc": "2025-12-09T12:18:12.117005+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T12:08:21Z\",\n \"avg_ns\": 101659388909,\n \"stddev_ns\": 3743902509,\n \"avg_ts\": 5.225901,\n \"stddev_ts\": 1.291809,\n \"samples_ns\": [ 76414456840, 109294312705, 119269397182 ],\n \"samples_ts\": [ 6.7003, 4.6846, 4.2928 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T12:15:30Z\",\n \"avg_ns\": 53694828487,\n \"stddev_ns\": 555271962,\n \"avg_ts\": 2.480625,\n \"stddev_ts\": 0.559658,\n \"samples_ns\": [ 45688508793, 45618117748, 69777858921 ],\n \"samples_ts\": [ 2.80158, 2.8059, 1.83439 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T12:08:21Z", - "avg_ns": 101659388909, - "stddev_ns": 3743902509, - "avg_ts": 5.225901, - "stddev_ts": 1.291809, - "samples_ns": [ - 76414456840, - 109294312705, - 119269397182 - ], - "samples_ts": [ - 6.7003, - 4.6846, - 4.2928 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T12:15:30Z", - "avg_ns": 53694828487, - "stddev_ns": 555271962, - "avg_ts": 2.480625, - "stddev_ts": 0.559658, - "samples_ns": [ - 45688508793, - 45618117748, - 69777858921 - ], - "samples_ts": [ - 2.80158, - 2.8059, - 1.83439 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 454 - }, - { - "timestamp_utc": "2025-12-09T12:37:11.677081+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T12:18:17Z\",\n \"avg_ns\": 100449908193,\n \"stddev_ns\": 480466923,\n \"avg_ts\": 5.312427,\n \"stddev_ts\": 1.357104,\n \"samples_ns\": [ 75236818022, 102832542594, 123280363964 ],\n \"samples_ts\": [ 6.80518, 4.97897, 4.15314 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T12:25:23Z\",\n \"avg_ns\": 235981842065,\n \"stddev_ns\": 3800454311,\n \"avg_ts\": 2.170746,\n \"stddev_ts\": 0.059347,\n \"samples_ns\": [ 242913108720, 230050093215, 234982324261 ],\n \"samples_ts\": [ 2.10775, 2.2256, 2.17889 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T12:18:17Z", - "avg_ns": 100449908193, - "stddev_ns": 480466923, - "avg_ts": 5.312427, - "stddev_ts": 1.357104, - "samples_ns": [ - 75236818022, - 102832542594, - 123280363964 - ], - "samples_ts": [ - 6.80518, - 4.97897, - 4.15314 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T12:25:23Z", - "avg_ns": 235981842065, - "stddev_ns": 3800454311, - "avg_ts": 2.170746, - "stddev_ts": 0.059347, - "samples_ns": [ - 242913108720, - 230050093215, - 234982324261 - ], - "samples_ts": [ - 2.10775, - 2.2256, - 2.17889 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 455 - }, - { - "timestamp_utc": "2025-12-09T12:41:59.637720+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T12:37:14Z\",\n \"avg_ns\": 33448465933,\n \"stddev_ns\": 25766526,\n \"avg_ts\": 3.826784,\n \"stddev_ts\": 0.002949,\n \"samples_ns\": [ 33460820258, 33465728234, 33418849308 ],\n \"samples_ts\": [ 3.82537, 3.82481, 3.83017 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T12:39:26Z\",\n \"avg_ns\": 50673016741,\n \"stddev_ns\": 3759823784,\n \"avg_ts\": 2.590006,\n \"stddev_ts\": 0.471722,\n \"samples_ns\": [ 62582033783, 44680036398, 44756980042 ],\n \"samples_ts\": [ 2.04532, 2.86481, 2.85989 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T12:37:14Z", - "avg_ns": 33448465933, - "stddev_ns": 25766526, - "avg_ts": 3.826784, - "stddev_ts": 0.002949, - "samples_ns": [ - 33460820258, - 33465728234, - 33418849308 - ], - "samples_ts": [ - 3.82537, - 3.82481, - 3.83017 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T12:39:26Z", - "avg_ns": 50673016741, - "stddev_ns": 3759823784, - "avg_ts": 2.590006, - "stddev_ts": 0.471722, - "samples_ns": [ - 62582033783, - 44680036398, - 44756980042 - ], - "samples_ts": [ - 2.04532, - 2.86481, - 2.85989 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 456 - }, - { - "timestamp_utc": "2025-12-09T12:56:08.655598+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T12:42:02Z\",\n \"avg_ns\": 33124215049,\n \"stddev_ns\": 55205376,\n \"avg_ts\": 3.864250,\n \"stddev_ts\": 0.006446,\n \"samples_ns\": [ 33060650362, 33160150509, 33151844277 ],\n \"samples_ts\": [ 3.87167, 3.86005, 3.86102 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T12:44:04Z\",\n \"avg_ns\": 241217153607,\n \"stddev_ns\": 3776351345,\n \"avg_ts\": 2.130897,\n \"stddev_ts\": 0.164267,\n \"samples_ns\": [ 222103963366, 242830618402, 258716879055 ],\n \"samples_ts\": [ 2.30523, 2.10847, 1.979 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T12:42:02Z", - "avg_ns": 33124215049, - "stddev_ns": 55205376, - "avg_ts": 3.86425, - "stddev_ts": 0.006446, - "samples_ns": [ - 33060650362, - 33160150509, - 33151844277 - ], - "samples_ts": [ - 3.87167, - 3.86005, - 3.86102 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T12:44:04Z", - "avg_ns": 241217153607, - "stddev_ns": 3776351345, - "avg_ts": 2.130897, - "stddev_ts": 0.164267, - "samples_ns": [ - 222103963366, - 242830618402, - 258716879055 - ], - "samples_ts": [ - 2.30523, - 2.10847, - 1.979 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 457 - }, - { - "timestamp_utc": "2025-12-09T13:06:04.666913+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T12:56:11Z\",\n \"avg_ns\": 101686318130,\n \"stddev_ns\": 3945609575,\n \"avg_ts\": 5.293494,\n \"stddev_ts\": 1.327393,\n \"samples_ns\": [ 135941724397, 82980590179, 86136639816 ],\n \"samples_ts\": [ 3.76632, 6.17012, 5.94404 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:02:40Z\",\n \"avg_ns\": 67666997682,\n \"stddev_ns\": 1577586773,\n \"avg_ts\": 2.014941,\n \"stddev_ts\": 0.660159,\n \"samples_ns\": [ 82448709783, 74358969117, 46193314146 ],\n \"samples_ts\": [ 1.55248, 1.72138, 2.77096 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T12:56:11Z", - "avg_ns": 101686318130, - "stddev_ns": 3945609575, - "avg_ts": 5.293494, - "stddev_ts": 1.327393, - "samples_ns": [ - 135941724397, - 82980590179, - 86136639816 - ], - "samples_ts": [ - 3.76632, - 6.17012, - 5.94404 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T13:02:40Z", - "avg_ns": 67666997682, - "stddev_ns": 1577586773, - "avg_ts": 2.014941, - "stddev_ts": 0.660159, - "samples_ns": [ - 82448709783, - 74358969117, - 46193314146 - ], - "samples_ts": [ - 1.55248, - 1.72138, - 2.77096 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 458 - }, - { - "timestamp_utc": "2025-12-09T13:25:25.791758+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:06:07Z\",\n \"avg_ns\": 100267480069,\n \"stddev_ns\": 1807457923,\n \"avg_ts\": 5.380212,\n \"stddev_ts\": 1.384971,\n \"samples_ns\": [ 134466420218, 86564329932, 79771690058 ],\n \"samples_ts\": [ 3.80764, 5.91468, 6.41832 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:12:28Z\",\n \"avg_ns\": 258885550955,\n \"stddev_ns\": 822558586,\n \"avg_ts\": 1.977904,\n \"stddev_ts\": 0.024206,\n \"samples_ns\": [ 255258868925, 260510855390, 260886928550 ],\n \"samples_ts\": [ 2.00581, 1.96537, 1.96254 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T13:06:07Z", - "avg_ns": 100267480069, - "stddev_ns": 1807457923, - "avg_ts": 5.380212, - "stddev_ts": 1.384971, - "samples_ns": [ - 134466420218, - 86564329932, - 79771690058 - ], - "samples_ts": [ - 3.80764, - 5.91468, - 6.41832 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T13:12:28Z", - "avg_ns": 258885550955, - "stddev_ns": 822558586, - "avg_ts": 1.977904, - "stddev_ts": 0.024206, - "samples_ns": [ - 255258868925, - 260510855390, - 260886928550 - ], - "samples_ts": [ - 2.00581, - 1.96537, - 1.96254 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 459 - }, - { - "timestamp_utc": "2025-12-09T13:30:14.082092+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:25:28Z\",\n \"avg_ns\": 18574672421,\n \"stddev_ns\": 6714724,\n \"avg_ts\": 6.891105,\n \"stddev_ts\": 0.002491,\n \"samples_ns\": [ 18572365984, 18569414884, 18582236395 ],\n \"samples_ts\": [ 6.89196, 6.89306, 6.8883 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:26:43Z\",\n \"avg_ns\": 70050316225,\n \"stddev_ns\": 746453708,\n \"avg_ts\": 1.885572,\n \"stddev_ts\": 0.430114,\n \"samples_ns\": [ 53805492790, 80528712158, 75816743727 ],\n \"samples_ts\": [ 2.37894, 1.5895, 1.68828 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T13:25:28Z", - "avg_ns": 18574672421, - "stddev_ns": 6714724, - "avg_ts": 6.891105, - "stddev_ts": 0.002491, - "samples_ns": [ - 18572365984, - 18569414884, - 18582236395 - ], - "samples_ts": [ - 6.89196, - 6.89306, - 6.8883 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T13:26:43Z", - "avg_ns": 70050316225, - "stddev_ns": 746453708, - "avg_ts": 1.885572, - "stddev_ts": 0.430114, - "samples_ns": [ - 53805492790, - 80528712158, - 75816743727 - ], - "samples_ts": [ - 2.37894, - 1.5895, - 1.68828 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 460 - }, - { - "timestamp_utc": "2025-12-09T13:43:44.561969+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:30:17Z\",\n \"avg_ns\": 18533730280,\n \"stddev_ns\": 22880152,\n \"avg_ts\": 6.906334,\n \"stddev_ts\": 0.008523,\n \"samples_ns\": [ 18558311416, 18513054687, 18529824738 ],\n \"samples_ts\": [ 6.89718, 6.91404, 6.90778 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:31:31Z\",\n \"avg_ns\": 244105890686,\n \"stddev_ns\": 551403125,\n \"avg_ts\": 2.106791,\n \"stddev_ts\": 0.175820,\n \"samples_ns\": [ 256738227431, 253876373611, 221703071017 ],\n \"samples_ts\": [ 1.99425, 2.01673, 2.3094 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T13:30:17Z", - "avg_ns": 18533730280, - "stddev_ns": 22880152, - "avg_ts": 6.906334, - "stddev_ts": 0.008523, - "samples_ns": [ - 18558311416, - 18513054687, - 18529824738 - ], - "samples_ts": [ - 6.89718, - 6.91404, - 6.90778 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T13:31:31Z", - "avg_ns": 244105890686, - "stddev_ns": 551403125, - "avg_ts": 2.106791, - "stddev_ts": 0.17582, - "samples_ns": [ - 256738227431, - 253876373611, - 221703071017 - ], - "samples_ts": [ - 1.99425, - 2.01673, - 2.3094 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 461 - }, - { - "timestamp_utc": "2025-12-09T13:53:24.289447+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:43:50Z\",\n \"avg_ns\": 99818225981,\n \"stddev_ns\": 4000251715,\n \"avg_ts\": 5.326091,\n \"stddev_ts\": 1.337275,\n \"samples_ns\": [ 74595814963, 115473519941, 109385343039 ],\n \"samples_ts\": [ 6.86366, 4.43392, 4.6807 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:50:38Z\",\n \"avg_ns\": 54851129087,\n \"stddev_ns\": 4026724410,\n \"avg_ts\": 2.469401,\n \"stddev_ts\": 0.652893,\n \"samples_ns\": [ 44954774269, 44985046761, 74613566232 ],\n \"samples_ts\": [ 2.84731, 2.84539, 1.71551 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T13:43:50Z", - "avg_ns": 99818225981, - "stddev_ns": 4000251715, - "avg_ts": 5.326091, - "stddev_ts": 1.337275, - "samples_ns": [ - 74595814963, - 115473519941, - 109385343039 - ], - "samples_ts": [ - 6.86366, - 4.43392, - 4.6807 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T13:50:38Z", - "avg_ns": 54851129087, - "stddev_ns": 4026724410, - "avg_ts": 2.469401, - "stddev_ts": 0.652893, - "samples_ns": [ - 44954774269, - 44985046761, - 74613566232 - ], - "samples_ts": [ - 2.84731, - 2.84539, - 1.71551 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 462 - }, - { - "timestamp_utc": "2025-12-09T14:12:03.959107+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:53:30Z\",\n \"avg_ns\": 99841460834,\n \"stddev_ns\": 1192512846,\n \"avg_ts\": 5.329801,\n \"stddev_ts\": 1.343744,\n \"samples_ns\": [ 74659778169, 106677331072, 118187273263 ],\n \"samples_ts\": [ 6.85778, 4.79952, 4.33211 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:00:27Z\",\n \"avg_ns\": 231829828595,\n \"stddev_ns\": 3782929219,\n \"avg_ts\": 2.210445,\n \"stddev_ts\": 0.080364,\n \"samples_ns\": [ 239331930969, 222840492363, 233317062455 ],\n \"samples_ts\": [ 2.13929, 2.29761, 2.19444 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T13:53:30Z", - "avg_ns": 99841460834, - "stddev_ns": 1192512846, - "avg_ts": 5.329801, - "stddev_ts": 1.343744, - "samples_ns": [ - 74659778169, - 106677331072, - 118187273263 - ], - "samples_ts": [ - 6.85778, - 4.79952, - 4.33211 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T14:00:27Z", - "avg_ns": 231829828595, - "stddev_ns": 3782929219, - "avg_ts": 2.210445, - "stddev_ts": 0.080364, - "samples_ns": [ - 239331930969, - 222840492363, - 233317062455 - ], - "samples_ts": [ - 2.13929, - 2.29761, - 2.19444 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 463 - }, - { - "timestamp_utc": "2025-12-09T14:16:51.515703+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:12:07Z\",\n \"avg_ns\": 33330033917,\n \"stddev_ns\": 7135070,\n \"avg_ts\": 3.840380,\n \"stddev_ts\": 0.000822,\n \"samples_ns\": [ 33322162685, 33336069218, 33331869849 ],\n \"samples_ts\": [ 3.84129, 3.83968, 3.84017 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:14:12Z\",\n \"avg_ns\": 52756311089,\n \"stddev_ns\": 3764458763,\n \"avg_ts\": 2.530567,\n \"stddev_ts\": 0.585718,\n \"samples_ns\": [ 69030378492, 44687380958, 44551173818 ],\n \"samples_ts\": [ 1.85426, 2.86434, 2.8731 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T14:12:07Z", - "avg_ns": 33330033917, - "stddev_ns": 7135070, - "avg_ts": 3.84038, - "stddev_ts": 0.000822, - "samples_ns": [ - 33322162685, - 33336069218, - 33331869849 - ], - "samples_ts": [ - 3.84129, - 3.83968, - 3.84017 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T14:14:12Z", - "avg_ns": 52756311089, - "stddev_ns": 3764458763, - "avg_ts": 2.530567, - "stddev_ts": 0.585718, - "samples_ns": [ - 69030378492, - 44687380958, - 44551173818 - ], - "samples_ts": [ - 1.85426, - 2.86434, - 2.8731 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 464 - }, - { - "timestamp_utc": "2025-12-09T14:31:00.907309+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:16:54Z\",\n \"avg_ns\": 32275130436,\n \"stddev_ns\": 1876955302,\n \"avg_ts\": 3.975176,\n \"stddev_ts\": 0.239203,\n \"samples_ns\": [ 30107943389, 33379613394, 33337834527 ],\n \"samples_ts\": [ 4.25137, 3.83468, 3.83948 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:18:50Z\",\n \"avg_ns\": 243232927730,\n \"stddev_ns\": 3760611063,\n \"avg_ts\": 2.111182,\n \"stddev_ts\": 0.142097,\n \"samples_ns\": [ 225534719306, 247719700890, 256444362995 ],\n \"samples_ts\": [ 2.27016, 2.06685, 1.99653 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T14:16:54Z", - "avg_ns": 32275130436, - "stddev_ns": 1876955302, - "avg_ts": 3.975176, - "stddev_ts": 0.239203, - "samples_ns": [ - 30107943389, - 33379613394, - 33337834527 - ], - "samples_ts": [ - 4.25137, - 3.83468, - 3.83948 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T14:18:50Z", - "avg_ns": 243232927730, - "stddev_ns": 3760611063, - "avg_ts": 2.111182, - "stddev_ts": 0.142097, - "samples_ns": [ - 225534719306, - 247719700890, - 256444362995 - ], - "samples_ts": [ - 2.27016, - 2.06685, - 1.99653 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 465 - }, - { - "timestamp_utc": "2025-12-09T14:40:51.596526+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:31:04Z\",\n \"avg_ns\": 101348968951,\n \"stddev_ns\": 4013048396,\n \"avg_ts\": 5.327383,\n \"stddev_ts\": 1.394469,\n \"samples_ns\": [ 135714929835, 89166707897, 79165269121 ],\n \"samples_ts\": [ 3.77261, 5.74205, 6.46748 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:37:25Z\",\n \"avg_ns\": 68362780363,\n \"stddev_ns\": 681306632,\n \"avg_ts\": 2.009195,\n \"stddev_ts\": 0.704145,\n \"samples_ns\": [ 79844922876, 79889872842, 45353545371 ],\n \"samples_ts\": [ 1.60311, 1.60221, 2.82227 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T14:31:04Z", - "avg_ns": 101348968951, - "stddev_ns": 4013048396, - "avg_ts": 5.327383, - "stddev_ts": 1.394469, - "samples_ns": [ - 135714929835, - 89166707897, - 79165269121 - ], - "samples_ts": [ - 3.77261, - 5.74205, - 6.46748 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T14:37:25Z", - "avg_ns": 68362780363, - "stddev_ns": 681306632, - "avg_ts": 2.009195, - "stddev_ts": 0.704145, - "samples_ns": [ - 79844922876, - 79889872842, - 45353545371 - ], - "samples_ts": [ - 1.60311, - 1.60221, - 2.82227 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 466 - }, - { - "timestamp_utc": "2025-12-09T14:59:49.818810+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:40:54Z\",\n \"avg_ns\": 100551648840,\n \"stddev_ns\": 1945995649,\n \"avg_ts\": 5.341564,\n \"stddev_ts\": 1.407669,\n \"samples_ns\": [ 129315245699, 96749307475, 75590393346 ],\n \"samples_ts\": [ 3.95932, 5.29203, 6.77335 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:47:12Z\",\n \"avg_ns\": 252274565862,\n \"stddev_ns\": 1894849411,\n \"avg_ts\": 2.030008,\n \"stddev_ts\": 0.038176,\n \"samples_ns\": [ 254653190051, 255303524544, 246866982992 ],\n \"samples_ts\": [ 2.01058, 2.00546, 2.07399 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T14:40:54Z", - "avg_ns": 100551648840, - "stddev_ns": 1945995649, - "avg_ts": 5.341564, - "stddev_ts": 1.407669, - "samples_ns": [ - 129315245699, - 96749307475, - 75590393346 - ], - "samples_ts": [ - 3.95932, - 5.29203, - 6.77335 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T14:47:12Z", - "avg_ns": 252274565862, - "stddev_ns": 1894849411, - "avg_ts": 2.030008, - "stddev_ts": 0.038176, - "samples_ns": [ - 254653190051, - 255303524544, - 246866982992 - ], - "samples_ts": [ - 2.01058, - 2.00546, - 2.07399 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 467 - }, - { - "timestamp_utc": "2025-12-09T15:04:02.735306+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:59:56Z\",\n \"avg_ns\": 9410550768,\n \"stddev_ns\": 11853426,\n \"avg_ts\": 13.601769,\n \"stddev_ts\": 0.017142,\n \"samples_ns\": [ 9415267314, 9419319065, 9397065927 ],\n \"samples_ts\": [ 13.5949, 13.5891, 13.6213 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:00:42Z\",\n \"avg_ns\": 66511966679,\n \"stddev_ns\": 2097037513,\n \"avg_ts\": 2.705606,\n \"stddev_ts\": 2.131446,\n \"samples_ns\": [ 24797576523, 79344894300, 95393429214 ],\n \"samples_ts\": [ 5.16179, 1.61321, 1.34181 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T14:59:56Z", - "avg_ns": 9410550768, - "stddev_ns": 11853426, - "avg_ts": 13.601769, - "stddev_ts": 0.017142, - "samples_ns": [ - 9415267314, - 9419319065, - 9397065927 - ], - "samples_ts": [ - 13.5949, - 13.5891, - 13.6213 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T15:00:42Z", - "avg_ns": 66511966679, - "stddev_ns": 2097037513, - "avg_ts": 2.705606, - "stddev_ts": 2.131446, - "samples_ns": [ - 24797576523, - 79344894300, - 95393429214 - ], - "samples_ts": [ - 5.16179, - 1.61321, - 1.34181 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 468 - }, - { - "timestamp_utc": "2025-12-09T15:16:16.283411+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:04:08Z\",\n \"avg_ns\": 15738202616,\n \"stddev_ns\": 1163526527,\n \"avg_ts\": 10.596323,\n \"stddev_ts\": 5.280802,\n \"samples_ns\": [ 28453422986, 9383536376, 9377648486 ],\n \"samples_ts\": [ 4.49858, 13.6409, 13.6495 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:05:28Z\",\n \"avg_ns\": 215698381282,\n \"stddev_ns\": 2041748721,\n \"avg_ts\": 2.409182,\n \"stddev_ts\": 0.343158,\n \"samples_ns\": [ 254337556463, 195702509507, 197055077876 ],\n \"samples_ts\": [ 2.01307, 2.61622, 2.59826 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T15:04:08Z", - "avg_ns": 15738202616, - "stddev_ns": 1163526527, - "avg_ts": 10.596323, - "stddev_ts": 5.280802, - "samples_ns": [ - 28453422986, - 9383536376, - 9377648486 - ], - "samples_ts": [ - 4.49858, - 13.6409, - 13.6495 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T15:05:28Z", - "avg_ns": 215698381282, - "stddev_ns": 2041748721, - "avg_ts": 2.409182, - "stddev_ts": 0.343158, - "samples_ns": [ - 254337556463, - 195702509507, - 197055077876 - ], - "samples_ts": [ - 2.01307, - 2.61622, - 2.59826 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 469 - }, - { - "timestamp_utc": "2025-12-09T15:25:42.476178+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:16:20Z\",\n \"avg_ns\": 78401584239,\n \"stddev_ns\": 3907260955,\n \"avg_ts\": 7.978288,\n \"stddev_ts\": 4.815141,\n \"samples_ns\": [ 103847771543, 37843430380, 93513550796 ],\n \"samples_ts\": [ 4.93029, 13.5294, 5.47514 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:22:31Z\",\n \"avg_ns\": 63476168807,\n \"stddev_ns\": 889539140,\n \"avg_ts\": 2.775195,\n \"stddev_ts\": 2.071506,\n \"samples_ns\": [ 96311784290, 69254160307, 24862561824 ],\n \"samples_ts\": [ 1.32902, 1.84826, 5.1483 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T15:16:20Z", - "avg_ns": 78401584239, - "stddev_ns": 3907260955, - "avg_ts": 7.978288, - "stddev_ts": 4.815141, - "samples_ns": [ - 103847771543, - 37843430380, - 93513550796 - ], - "samples_ts": [ - 4.93029, - 13.5294, - 5.47514 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T15:22:31Z", - "avg_ns": 63476168807, - "stddev_ns": 889539140, - "avg_ts": 2.775195, - "stddev_ts": 2.071506, - "samples_ns": [ - 96311784290, - 69254160307, - 24862561824 - ], - "samples_ts": [ - 1.32902, - 1.84826, - 5.1483 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 470 - }, - { - "timestamp_utc": "2025-12-09T15:43:15.300153+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:25:45Z\",\n \"avg_ns\": 88281320443,\n \"stddev_ns\": 2051304722,\n \"avg_ts\": 7.621436,\n \"stddev_ts\": 5.219665,\n \"samples_ns\": [ 132885578127, 94253725434, 37704657770 ],\n \"samples_ts\": [ 3.85294, 5.43215, 13.5792 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:31:01Z\",\n \"avg_ns\": 244468508551,\n \"stddev_ns\": 3909511834,\n \"avg_ts\": 2.146726,\n \"stddev_ts\": 0.433812,\n \"samples_ns\": [ 272456371636, 267541077012, 193408077005 ],\n \"samples_ts\": [ 1.8792, 1.91372, 2.64725 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T15:25:45Z", - "avg_ns": 88281320443, - "stddev_ns": 2051304722, - "avg_ts": 7.621436, - "stddev_ts": 5.219665, - "samples_ns": [ - 132885578127, - 94253725434, - 37704657770 - ], - "samples_ts": [ - 3.85294, - 5.43215, - 13.5792 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T15:31:01Z", - "avg_ns": 244468508551, - "stddev_ns": 3909511834, - "avg_ts": 2.146726, - "stddev_ts": 0.433812, - "samples_ns": [ - 272456371636, - 267541077012, - 193408077005 - ], - "samples_ts": [ - 1.8792, - 1.91372, - 2.64725 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 471 - }, - { - "timestamp_utc": "2025-12-09T15:47:16.237075+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:43:20Z\",\n \"avg_ns\": 27041178365,\n \"stddev_ns\": 3579536559,\n \"avg_ts\": 5.394948,\n \"stddev_ts\": 2.617623,\n \"samples_ns\": [ 32958765960, 32958387053, 15206382083 ],\n \"samples_ts\": [ 3.88364, 3.88369, 8.41752 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:45:15Z\",\n \"avg_ns\": 40122657447,\n \"stddev_ns\": 3716218494,\n \"avg_ts\": 4.057615,\n \"stddev_ts\": 1.952594,\n \"samples_ns\": [ 24655604235, 24718244358, 70994123749 ],\n \"samples_ts\": [ 5.19152, 5.17836, 1.80297 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T15:43:20Z", - "avg_ns": 27041178365, - "stddev_ns": 3579536559, - "avg_ts": 5.394948, - "stddev_ts": 2.617623, - "samples_ns": [ - 32958765960, - 32958387053, - 15206382083 - ], - "samples_ts": [ - 3.88364, - 3.88369, - 8.41752 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T15:45:15Z", - "avg_ns": 40122657447, - "stddev_ns": 3716218494, - "avg_ts": 4.057615, - "stddev_ts": 1.952594, - "samples_ns": [ - 24655604235, - 24718244358, - 70994123749 - ], - "samples_ts": [ - 5.19152, - 5.17836, - 1.80297 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 472 - }, - { - "timestamp_utc": "2025-12-09T16:00:43.302757+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:47:22Z\",\n \"avg_ns\": 33142510702,\n \"stddev_ns\": 21078692,\n \"avg_ts\": 3.862110,\n \"stddev_ts\": 0.002457,\n \"samples_ns\": [ 33120285171, 33145030994, 33162215941 ],\n \"samples_ts\": [ 3.8647, 3.86182, 3.85981 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:49:34Z\",\n \"avg_ns\": 222536373185,\n \"stddev_ns\": 3910148045,\n \"avg_ts\": 2.352202,\n \"stddev_ts\": 0.405574,\n \"samples_ns\": [ 194330490985, 202016760692, 271261867880 ],\n \"samples_ts\": [ 2.63469, 2.53444, 1.88748 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T15:47:22Z", - "avg_ns": 33142510702, - "stddev_ns": 21078692, - "avg_ts": 3.86211, - "stddev_ts": 0.002457, - "samples_ns": [ - 33120285171, - 33145030994, - 33162215941 - ], - "samples_ts": [ - 3.8647, - 3.86182, - 3.85981 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T15:49:34Z", - "avg_ns": 222536373185, - "stddev_ns": 3910148045, - "avg_ts": 2.352202, - "stddev_ts": 0.405574, - "samples_ns": [ - 194330490985, - 202016760692, - 271261867880 - ], - "samples_ts": [ - 2.63469, - 2.53444, - 1.88748 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 473 - }, - { - "timestamp_utc": "2025-12-09T16:10:08.629948+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T16:00:46Z\",\n \"avg_ns\": 87460604016,\n \"stddev_ns\": 1658179427,\n \"avg_ts\": 7.649309,\n \"stddev_ts\": 5.167497,\n \"samples_ns\": [ 131936128369, 92631807380, 37813876301 ],\n \"samples_ts\": [ 3.88067, 5.52726, 13.54 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T16:06:01Z\",\n \"avg_ns\": 82158828453,\n \"stddev_ns\": 4098459199,\n \"avg_ts\": 1.620374,\n \"stddev_ts\": 0.417413,\n \"samples_ns\": [ 60885753196, 93250987563, 92339744600 ],\n \"samples_ts\": [ 2.1023, 1.37264, 1.38619 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T16:00:46Z", - "avg_ns": 87460604016, - "stddev_ns": 1658179427, - "avg_ts": 7.649309, - "stddev_ts": 5.167497, - "samples_ns": [ - 131936128369, - 92631807380, - 37813876301 - ], - "samples_ts": [ - 3.88067, - 5.52726, - 13.54 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T16:06:01Z", - "avg_ns": 82158828453, - "stddev_ns": 4098459199, - "avg_ts": 1.620374, - "stddev_ts": 0.417413, - "samples_ns": [ - 60885753196, - 93250987563, - 92339744600 - ], - "samples_ts": [ - 2.1023, - 1.37264, - 1.38619 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 474 - }, - { - "timestamp_utc": "2025-12-09T16:26:12.402739+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T16:10:11Z\",\n \"avg_ns\": 92335099288,\n \"stddev_ns\": 4055883808,\n \"avg_ts\": 6.143659,\n \"stddev_ts\": 2.342153,\n \"samples_ns\": [ 59747097245, 131442741902, 85815458718 ],\n \"samples_ts\": [ 8.56945, 3.89523, 5.96629 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T16:15:26Z\",\n \"avg_ns\": 215133750552,\n \"stddev_ns\": 3903209960,\n \"avg_ts\": 2.407494,\n \"stddev_ts\": 0.304535,\n \"samples_ns\": [ 248727548477, 195481250273, 201192452907 ],\n \"samples_ts\": [ 2.05848, 2.61918, 2.54483 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T16:10:11Z", - "avg_ns": 92335099288, - "stddev_ns": 4055883808, - "avg_ts": 6.143659, - "stddev_ts": 2.342153, - "samples_ns": [ - 59747097245, - 131442741902, - 85815458718 - ], - "samples_ts": [ - 8.56945, - 3.89523, - 5.96629 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T16:15:26Z", - "avg_ns": 215133750552, - "stddev_ns": 3903209960, - "avg_ts": 2.407494, - "stddev_ts": 0.304535, - "samples_ns": [ - 248727548477, - 195481250273, - 201192452907 - ], - "samples_ts": [ - 2.05848, - 2.61918, - 2.54483 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 475 - }, - { - "timestamp_utc": "2025-12-09T16:30:56.085637+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T16:26:15Z\",\n \"avg_ns\": 33138966800,\n \"stddev_ns\": 203350233,\n \"avg_ts\": 3.862619,\n \"stddev_ts\": 0.023623,\n \"samples_ns\": [ 33000917528, 33372484950, 33043497922 ],\n \"samples_ts\": [ 3.87868, 3.8355, 3.87368 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T16:28:25Z\",\n \"avg_ns\": 49734191852,\n \"stddev_ns\": 4155655612,\n \"avg_ts\": 3.607303,\n \"stddev_ts\": 2.001705,\n \"samples_ns\": [ 94668227980, 29792725904, 24741621673 ],\n \"samples_ts\": [ 1.35209, 4.29635, 5.17347 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T16:26:15Z", - "avg_ns": 33138966800, - "stddev_ns": 203350233, - "avg_ts": 3.862619, - "stddev_ts": 0.023623, - "samples_ns": [ - 33000917528, - 33372484950, - 33043497922 - ], - "samples_ts": [ - 3.87868, - 3.8355, - 3.87368 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T16:28:25Z", - "avg_ns": 49734191852, - "stddev_ns": 4155655612, - "avg_ts": 3.607303, - "stddev_ts": 2.001705, - "samples_ns": [ - 94668227980, - 29792725904, - 24741621673 - ], - "samples_ts": [ - 1.35209, - 4.29635, - 5.17347 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 476 - }, - { - "timestamp_utc": "2025-12-09T16:45:10.074429+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T16:30:59Z\",\n \"avg_ns\": 25267493686,\n \"stddev_ns\": 899407290,\n \"avg_ts\": 6.307915,\n \"stddev_ts\": 3.999352,\n \"samples_ns\": [ 11718791281, 30823644830, 33260044947 ],\n \"samples_ts\": [ 10.9226, 4.15266, 3.84846 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T16:32:24Z\",\n \"avg_ns\": 254813744291,\n \"stddev_ns\": 1825539787,\n \"avg_ts\": 2.030683,\n \"stddev_ts\": 0.264569,\n \"samples_ns\": [ 219161279220, 272512634449, 272767319204 ],\n \"samples_ts\": [ 2.33618, 1.87881, 1.87706 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T16:30:59Z", - "avg_ns": 25267493686, - "stddev_ns": 899407290, - "avg_ts": 6.307915, - "stddev_ts": 3.999352, - "samples_ns": [ - 11718791281, - 30823644830, - 33260044947 - ], - "samples_ts": [ - 10.9226, - 4.15266, - 3.84846 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T16:32:24Z", - "avg_ns": 254813744291, - "stddev_ns": 1825539787, - "avg_ts": 2.030683, - "stddev_ts": 0.264569, - "samples_ns": [ - 219161279220, - 272512634449, - 272767319204 - ], - "samples_ts": [ - 2.33618, - 1.87881, - 1.87706 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 477 - }, - { - "timestamp_utc": "2025-12-09T16:53:03.762074+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T16:45:13Z\",\n \"avg_ns\": 92535179674,\n \"stddev_ns\": 1995731193,\n \"avg_ts\": 6.037745,\n \"stddev_ts\": 2.024586,\n \"samples_ns\": [ 64856596855, 131989182945, 80759759222 ],\n \"samples_ts\": [ 7.89434, 3.87911, 6.33979 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T16:50:28Z\",\n \"avg_ns\": 51384424064,\n \"stddev_ns\": 4238519642,\n \"avg_ts\": 3.375039,\n \"stddev_ts\": 1.945989,\n \"samples_ns\": [ 24329581566, 36677872071, 93145818556 ],\n \"samples_ts\": [ 5.26109, 3.48984, 1.37419 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T16:45:13Z", - "avg_ns": 92535179674, - "stddev_ns": 1995731193, - "avg_ts": 6.037745, - "stddev_ts": 2.024586, - "samples_ns": [ - 64856596855, - 131989182945, - 80759759222 - ], - "samples_ts": [ - 7.89434, - 3.87911, - 6.33979 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T16:50:28Z", - "avg_ns": 51384424064, - "stddev_ns": 4238519642, - "avg_ts": 3.375039, - "stddev_ts": 1.945989, - "samples_ns": [ - 24329581566, - 36677872071, - 93145818556 - ], - "samples_ts": [ - 5.26109, - 3.48984, - 1.37419 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 478 - }, - { - "timestamp_utc": "2025-12-09T17:10:39.461136+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T16:53:09Z\",\n \"avg_ns\": 81338725792,\n \"stddev_ns\": 3446700760,\n \"avg_ts\": 8.087837,\n \"stddev_ts\": 4.903114,\n \"samples_ns\": [ 38002421712, 74103429239, 131910326425 ],\n \"samples_ts\": [ 13.4728, 6.90926, 3.88142 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T16:59:16Z\",\n \"avg_ns\": 227421712470,\n \"stddev_ns\": 1548118210,\n \"avg_ts\": 2.295875,\n \"stddev_ts\": 0.378947,\n \"samples_ns\": [ 195600238480, 214057338522, 272607560410 ],\n \"samples_ts\": [ 2.61758, 2.39188, 1.87816 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T16:53:09Z", - "avg_ns": 81338725792, - "stddev_ns": 3446700760, - "avg_ts": 8.087837, - "stddev_ts": 4.903114, - "samples_ns": [ - 38002421712, - 74103429239, - 131910326425 - ], - "samples_ts": [ - 13.4728, - 6.90926, - 3.88142 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T16:59:16Z", - "avg_ns": 227421712470, - "stddev_ns": 1548118210, - "avg_ts": 2.295875, - "stddev_ts": 0.378947, - "samples_ns": [ - 195600238480, - 214057338522, - 272607560410 - ], - "samples_ts": [ - 2.61758, - 2.39188, - 1.87816 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 479 - }, - { - "timestamp_utc": "2025-12-09T17:15:25.498164+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T17:10:42Z\",\n \"avg_ns\": 10885723431,\n \"stddev_ns\": 2588848592,\n \"avg_ts\": 12.161725,\n \"stddev_ts\": 2.543123,\n \"samples_ns\": [ 9390065185, 9392037037, 13875068073 ],\n \"samples_ts\": [ 13.6314, 13.6286, 9.22518 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T17:11:24Z\",\n \"avg_ns\": 79910698072,\n \"stddev_ns\": 4147118267,\n \"avg_ts\": 1.781862,\n \"stddev_ts\": 0.776096,\n \"samples_ns\": [ 96131860703, 95803668766, 47796564748 ],\n \"samples_ts\": [ 1.3315, 1.33607, 2.67802 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T17:10:42Z", - "avg_ns": 10885723431, - "stddev_ns": 2588848592, - "avg_ts": 12.161725, - "stddev_ts": 2.543123, - "samples_ns": [ - 9390065185, - 9392037037, - 13875068073 - ], - "samples_ts": [ - 13.6314, - 13.6286, - 9.22518 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T17:11:24Z", - "avg_ns": 79910698072, - "stddev_ns": 4147118267, - "avg_ts": 1.781862, - "stddev_ts": 0.776096, - "samples_ns": [ - 96131860703, - 95803668766, - 47796564748 - ], - "samples_ts": [ - 1.3315, - 1.33607, - 2.67802 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 480 - }, - { - "timestamp_utc": "2025-12-09T17:28:42.581358+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T17:15:28Z\",\n \"avg_ns\": 9414709658,\n \"stddev_ns\": 346029,\n \"avg_ts\": 13.595746,\n \"stddev_ts\": 0.000500,\n \"samples_ns\": [ 9414964728, 9414848470, 9414315776 ],\n \"samples_ts\": [ 13.5954, 13.5955, 13.5963 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T17:16:06Z\",\n \"avg_ns\": 251857743399,\n \"stddev_ns\": 1941249506,\n \"avg_ts\": 2.063244,\n \"stddev_ts\": 0.319781,\n \"samples_ns\": [ 270632222255, 274430735042, 210510272902 ],\n \"samples_ts\": [ 1.89187, 1.86568, 2.43219 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T17:15:28Z", - "avg_ns": 9414709658, - "stddev_ns": 346029, - "avg_ts": 13.595746, - "stddev_ts": 0.0005, - "samples_ns": [ - 9414964728, - 9414848470, - 9414315776 - ], - "samples_ts": [ - 13.5954, - 13.5955, - 13.5963 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T17:16:06Z", - "avg_ns": 251857743399, - "stddev_ns": 1941249506, - "avg_ts": 2.063244, - "stddev_ts": 0.319781, - "samples_ns": [ - 270632222255, - 274430735042, - 210510272902 - ], - "samples_ts": [ - 1.89187, - 1.86568, - 2.43219 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 481 - }, - { - "timestamp_utc": "2025-12-09T17:36:41.490444+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T17:28:48Z\",\n \"avg_ns\": 91109813600,\n \"stddev_ns\": 3346148868,\n \"avg_ts\": 7.464427,\n \"stddev_ts\": 5.297298,\n \"samples_ns\": [ 37802702007, 102234562500, 133292176295 ],\n \"samples_ts\": [ 13.544, 5.00809, 3.84119 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T17:34:56Z\",\n \"avg_ns\": 34506586286,\n \"stddev_ns\": 3955759926,\n \"avg_ts\": 3.938446,\n \"stddev_ts\": 1.157988,\n \"samples_ns\": [ 32943478147, 24990117408, 45586163304 ],\n \"samples_ts\": [ 3.88544, 5.12202, 2.80787 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T17:28:48Z", - "avg_ns": 91109813600, - "stddev_ns": 3346148868, - "avg_ts": 7.464427, - "stddev_ts": 5.297298, - "samples_ns": [ - 37802702007, - 102234562500, - 133292176295 - ], - "samples_ts": [ - 13.544, - 5.00809, - 3.84119 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T17:34:56Z", - "avg_ns": 34506586286, - "stddev_ns": 3955759926, - "avg_ts": 3.938446, - "stddev_ts": 1.157988, - "samples_ns": [ - 32943478147, - 24990117408, - 45586163304 - ], - "samples_ts": [ - 3.88544, - 5.12202, - 2.80787 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 482 - }, - { - "timestamp_utc": "2025-12-09T17:55:16.656127+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T17:36:47Z\",\n \"avg_ns\": 78416596840,\n \"stddev_ns\": 2531973332,\n \"avg_ts\": 8.028317,\n \"stddev_ts\": 4.826137,\n \"samples_ns\": [ 86404248619, 37788568915, 111056972986 ],\n \"samples_ts\": [ 5.92563, 13.5491, 4.61025 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T17:42:55Z\",\n \"avg_ns\": 246619156388,\n \"stddev_ns\": 900965319,\n \"avg_ts\": 2.123577,\n \"stddev_ts\": 0.410075,\n \"samples_ns\": [ 197146975241, 270555095109, 272155398815 ],\n \"samples_ts\": [ 2.59705, 1.89241, 1.88128 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T17:36:47Z", - "avg_ns": 78416596840, - "stddev_ns": 2531973332, - "avg_ts": 8.028317, - "stddev_ts": 4.826137, - "samples_ns": [ - 86404248619, - 37788568915, - 111056972986 - ], - "samples_ts": [ - 5.92563, - 13.5491, - 4.61025 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T17:42:55Z", - "avg_ns": 246619156388, - "stddev_ns": 900965319, - "avg_ts": 2.123577, - "stddev_ts": 0.410075, - "samples_ns": [ - 197146975241, - 270555095109, - 272155398815 - ], - "samples_ts": [ - 2.59705, - 1.89241, - 1.88128 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 483 - }, - { - "timestamp_utc": "2025-12-09T17:59:48.924281+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T17:55:19Z\",\n \"avg_ns\": 9492156893,\n \"stddev_ns\": 14063378,\n \"avg_ts\": 13.484837,\n \"stddev_ts\": 0.019995,\n \"samples_ns\": [ 9501396145, 9499102550, 9475971984 ],\n \"samples_ts\": [ 13.4717, 13.475, 13.5078 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T17:55:57Z\",\n \"avg_ns\": 76834633388,\n \"stddev_ns\": 585966873,\n \"avg_ts\": 2.022948,\n \"stddev_ts\": 1.206876,\n \"samples_ns\": [ 37464933534, 96428468584, 96610498048 ],\n \"samples_ts\": [ 3.41653, 1.32741, 1.32491 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T17:55:19Z", - "avg_ns": 9492156893, - "stddev_ns": 14063378, - "avg_ts": 13.484837, - "stddev_ts": 0.019995, - "samples_ns": [ - 9501396145, - 9499102550, - 9475971984 - ], - "samples_ts": [ - 13.4717, - 13.475, - 13.5078 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T17:55:57Z", - "avg_ns": 76834633388, - "stddev_ns": 585966873, - "avg_ts": 2.022948, - "stddev_ts": 1.206876, - "samples_ns": [ - 37464933534, - 96428468584, - 96610498048 - ], - "samples_ts": [ - 3.41653, - 1.32741, - 1.32491 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 484 - }, - { - "timestamp_utc": "2025-12-09T18:12:22.171380+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T17:59:54Z\",\n \"avg_ns\": 9390429732,\n \"stddev_ns\": 18250645,\n \"avg_ts\": 13.630933,\n \"stddev_ts\": 0.026466,\n \"samples_ns\": [ 9411192901, 9383168309, 9376927988 ],\n \"samples_ts\": [ 13.6008, 13.6414, 13.6505 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T18:00:42Z\",\n \"avg_ns\": 233104902006,\n \"stddev_ns\": 3589804316,\n \"avg_ts\": 2.233108,\n \"stddev_ts\": 0.343064,\n \"samples_ns\": [ 274359465158, 223842550555, 201112690306 ],\n \"samples_ts\": [ 1.86616, 2.28732, 2.54584 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T17:59:54Z", - "avg_ns": 9390429732, - "stddev_ns": 18250645, - "avg_ts": 13.630933, - "stddev_ts": 0.026466, - "samples_ns": [ - 9411192901, - 9383168309, - 9376927988 - ], - "samples_ts": [ - 13.6008, - 13.6414, - 13.6505 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T18:00:42Z", - "avg_ns": 233104902006, - "stddev_ns": 3589804316, - "avg_ts": 2.233108, - "stddev_ts": 0.343064, - "samples_ns": [ - 274359465158, - 223842550555, - 201112690306 - ], - "samples_ts": [ - 1.86616, - 2.28732, - 2.54584 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 485 - }, - { - "timestamp_utc": "2025-12-09T18:20:58.504123+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T18:12:27Z\",\n \"avg_ns\": 77418637906,\n \"stddev_ns\": 875922054,\n \"avg_ts\": 8.242948,\n \"stddev_ts\": 4.109804,\n \"samples_ns\": [ 58391918587, 42437526219, 131426468912 ],\n \"samples_ts\": [ 8.76834, 12.0648, 3.89571 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T18:18:31Z\",\n \"avg_ns\": 48604859754,\n \"stddev_ns\": 80541632,\n \"avg_ts\": 3.873335,\n \"stddev_ts\": 2.200240,\n \"samples_ns\": [ 96044415060, 24881350343, 24888813861 ],\n \"samples_ts\": [ 1.33272, 5.14442, 5.14287 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T18:12:27Z", - "avg_ns": 77418637906, - "stddev_ns": 875922054, - "avg_ts": 8.242948, - "stddev_ts": 4.109804, - "samples_ns": [ - 58391918587, - 42437526219, - 131426468912 - ], - "samples_ts": [ - 8.76834, - 12.0648, - 3.89571 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T18:18:31Z", - "avg_ns": 48604859754, - "stddev_ns": 80541632, - "avg_ts": 3.873335, - "stddev_ts": 2.20024, - "samples_ns": [ - 96044415060, - 24881350343, - 24888813861 - ], - "samples_ts": [ - 1.33272, - 5.14442, - 5.14287 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 486 - }, - { - "timestamp_utc": "2025-12-09T18:39:37.404799+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T18:21:01Z\",\n \"avg_ns\": 77211911113,\n \"stddev_ns\": 1498695206,\n \"avg_ts\": 8.193771,\n \"stddev_ts\": 3.905395,\n \"samples_ns\": [ 131266843703, 55983319293, 44385570344 ],\n \"samples_ts\": [ 3.90045, 9.14558, 11.5353 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T18:26:23Z\",\n \"avg_ns\": 264345587564,\n \"stddev_ns\": 1855378714,\n \"avg_ts\": 1.939522,\n \"stddev_ts\": 0.088697,\n \"samples_ns\": [ 266996397033, 274704484995, 251335880666 ],\n \"samples_ts\": [ 1.91763, 1.86382, 2.03711 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T18:21:01Z", - "avg_ns": 77211911113, - "stddev_ns": 1498695206, - "avg_ts": 8.193771, - "stddev_ts": 3.905395, - "samples_ns": [ - 131266843703, - 55983319293, - 44385570344 - ], - "samples_ts": [ - 3.90045, - 9.14558, - 11.5353 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T18:26:23Z", - "avg_ns": 264345587564, - "stddev_ns": 1855378714, - "avg_ts": 1.939522, - "stddev_ts": 0.088697, - "samples_ns": [ - 266996397033, - 274704484995, - 251335880666 - ], - "samples_ts": [ - 1.91763, - 1.86382, - 2.03711 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 487 - }, - { - "timestamp_utc": "2025-12-09T18:43:52.257065+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T18:39:43Z\",\n \"avg_ns\": 9376953104,\n \"stddev_ns\": 12026545,\n \"avg_ts\": 13.650504,\n \"stddev_ts\": 0.017496,\n \"samples_ns\": [ 9390627829, 9372208550, 9368022934 ],\n \"samples_ts\": [ 13.6306, 13.6574, 13.6635 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T18:40:38Z\",\n \"avg_ns\": 64232482016,\n \"stddev_ns\": 4000223275,\n \"avg_ts\": 2.762520,\n \"stddev_ts\": 2.074908,\n \"samples_ns\": [ 24919518918, 68991680358, 98786246773 ],\n \"samples_ts\": [ 5.13654, 1.8553, 1.29573 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T18:39:43Z", - "avg_ns": 9376953104, - "stddev_ns": 12026545, - "avg_ts": 13.650504, - "stddev_ts": 0.017496, - "samples_ns": [ - 9390627829, - 9372208550, - 9368022934 - ], - "samples_ts": [ - 13.6306, - 13.6574, - 13.6635 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T18:40:38Z", - "avg_ns": 64232482016, - "stddev_ns": 4000223275, - "avg_ts": 2.76252, - "stddev_ts": 2.074908, - "samples_ns": [ - 24919518918, - 68991680358, - 98786246773 - ], - "samples_ts": [ - 5.13654, - 1.8553, - 1.29573 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 488 - }, - { - "timestamp_utc": "2025-12-09T18:57:03.756348+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T18:43:58Z\",\n \"avg_ns\": 17848818023,\n \"stddev_ns\": 4129014280,\n \"avg_ts\": 9.806760,\n \"stddev_ts\": 5.248467,\n \"samples_ns\": [ 33453298713, 10713355285, 9379800072 ],\n \"samples_ts\": [ 3.82623, 11.9477, 13.6463 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T18:45:26Z\",\n \"avg_ns\": 232127059417,\n \"stddev_ns\": 1490881217,\n \"avg_ts\": 2.216899,\n \"stddev_ts\": 0.191299,\n \"samples_ns\": [ 254144095733, 213789690590, 228447391929 ],\n \"samples_ts\": [ 2.01461, 2.39488, 2.24122 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T18:43:58Z", - "avg_ns": 17848818023, - "stddev_ns": 4129014280, - "avg_ts": 9.80676, - "stddev_ts": 5.248467, - "samples_ns": [ - 33453298713, - 10713355285, - 9379800072 - ], - "samples_ts": [ - 3.82623, - 11.9477, - 13.6463 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T18:45:26Z", - "avg_ns": 232127059417, - "stddev_ns": 1490881217, - "avg_ts": 2.216899, - "stddev_ts": 0.191299, - "samples_ns": [ - 254144095733, - 213789690590, - 228447391929 - ], - "samples_ts": [ - 2.01461, - 2.39488, - 2.24122 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 489 - }, - { - "timestamp_utc": "2025-12-09T19:05:54.597728+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T18:57:09Z\",\n \"avg_ns\": 79922230521,\n \"stddev_ns\": 468720982,\n \"avg_ts\": 8.000134,\n \"stddev_ts\": 4.476962,\n \"samples_ns\": [ 71116230829, 39927006030, 128723454706 ],\n \"samples_ts\": [ 7.19948, 12.8234, 3.97752 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T19:03:22Z\",\n \"avg_ns\": 50398860549,\n \"stddev_ns\": 2698727985,\n \"avg_ts\": 3.517375,\n \"stddev_ts\": 1.984883,\n \"samples_ns\": [ 94335062930, 32527526961, 24333991757 ],\n \"samples_ts\": [ 1.35687, 3.93513, 5.26013 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T18:57:09Z", - "avg_ns": 79922230521, - "stddev_ns": 468720982, - "avg_ts": 8.000134, - "stddev_ts": 4.476962, - "samples_ns": [ - 71116230829, - 39927006030, - 128723454706 - ], - "samples_ts": [ - 7.19948, - 12.8234, - 3.97752 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T19:03:22Z", - "avg_ns": 50398860549, - "stddev_ns": 2698727985, - "avg_ts": 3.517375, - "stddev_ts": 1.984883, - "samples_ns": [ - 94335062930, - 32527526961, - 24333991757 - ], - "samples_ts": [ - 1.35687, - 3.93513, - 5.26013 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 490 - }, - { - "timestamp_utc": "2025-12-09T19:24:07.217967+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T19:05:57Z\",\n \"avg_ns\": 79197851357,\n \"stddev_ns\": 2844425845,\n \"avg_ts\": 8.138231,\n \"stddev_ts\": 4.396187,\n \"samples_ns\": [ 132229507466, 64901930139, 40462116467 ],\n \"samples_ts\": [ 3.87206, 7.88883, 12.6538 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T19:11:16Z\",\n \"avg_ns\": 256720068835,\n \"stddev_ns\": 4170976943,\n \"avg_ts\": 2.008889,\n \"stddev_ts\": 0.215347,\n \"samples_ns\": [ 270476185599, 272870979199, 226813041709 ],\n \"samples_ts\": [ 1.89296, 1.87634, 2.25737 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T19:05:57Z", - "avg_ns": 79197851357, - "stddev_ns": 2844425845, - "avg_ts": 8.138231, - "stddev_ts": 4.396187, - "samples_ns": [ - 132229507466, - 64901930139, - 40462116467 - ], - "samples_ts": [ - 3.87206, - 7.88883, - 12.6538 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T19:11:16Z", - "avg_ns": 256720068835, - "stddev_ns": 4170976943, - "avg_ts": 2.008889, - "stddev_ts": 0.215347, - "samples_ns": [ - 270476185599, - 272870979199, - 226813041709 - ], - "samples_ts": [ - 1.89296, - 1.87634, - 2.25737 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 491 - }, - { - "timestamp_utc": "2025-12-09T19:28:10.277064+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T19:24:12Z\",\n \"avg_ns\": 14783775025,\n \"stddev_ns\": 3729756394,\n \"avg_ts\": 10.767081,\n \"stddev_ts\": 4.993970,\n \"samples_ns\": [ 25597226838, 9379038152, 9375060087 ],\n \"samples_ts\": [ 5.00054, 13.6475, 13.6532 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T19:25:30Z\",\n \"avg_ns\": 53099032416,\n \"stddev_ns\": 3877037844,\n \"avg_ts\": 3.259977,\n \"stddev_ts\": 1.954512,\n \"samples_ns\": [ 24336216780, 40425945317, 94534935152 ],\n \"samples_ts\": [ 5.25965, 3.16628, 1.354 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T19:24:12Z", - "avg_ns": 14783775025, - "stddev_ns": 3729756394, - "avg_ts": 10.767081, - "stddev_ts": 4.99397, - "samples_ns": [ - 25597226838, - 9379038152, - 9375060087 - ], - "samples_ts": [ - 5.00054, - 13.6475, - 13.6532 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T19:25:30Z", - "avg_ns": 53099032416, - "stddev_ns": 3877037844, - "avg_ts": 3.259977, - "stddev_ts": 1.954512, - "samples_ns": [ - 24336216780, - 40425945317, - 94534935152 - ], - "samples_ts": [ - 5.25965, - 3.16628, - 1.354 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 492 - }, - { - "timestamp_utc": "2025-12-09T19:41:03.697003+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T19:28:15Z\",\n \"avg_ns\": 28371889592,\n \"stddev_ns\": 4274429620,\n \"avg_ts\": 4.818602,\n \"stddev_ts\": 1.628739,\n \"samples_ns\": [ 33044980277, 32964216909, 19106471591 ],\n \"samples_ts\": [ 3.87351, 3.883, 6.6993 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T19:30:14Z\",\n \"avg_ns\": 216365484870,\n \"stddev_ns\": 2014020019,\n \"avg_ts\": 2.372804,\n \"stddev_ts\": 0.152272,\n \"samples_ns\": [ 217717690507, 202009017588, 229369746517 ],\n \"samples_ts\": [ 2.35167, 2.53454, 2.2322 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T19:28:15Z", - "avg_ns": 28371889592, - "stddev_ns": 4274429620, - "avg_ts": 4.818602, - "stddev_ts": 1.628739, - "samples_ns": [ - 33044980277, - 32964216909, - 19106471591 - ], - "samples_ts": [ - 3.87351, - 3.883, - 6.6993 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T19:30:14Z", - "avg_ns": 216365484870, - "stddev_ns": 2014020019, - "avg_ts": 2.372804, - "stddev_ts": 0.152272, - "samples_ns": [ - 217717690507, - 202009017588, - 229369746517 - ], - "samples_ts": [ - 2.35167, - 2.53454, - 2.2322 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 493 - }, - { - "timestamp_utc": "2025-12-09T19:50:30.806882+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T19:41:06Z\",\n \"avg_ns\": 78281596850,\n \"stddev_ns\": 1701006234,\n \"avg_ts\": 8.219809,\n \"stddev_ts\": 4.234294,\n \"samples_ns\": [ 133059129960, 41620670497, 60164990093 ],\n \"samples_ts\": [ 3.84791, 12.3016, 8.50993 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T19:46:45Z\",\n \"avg_ns\": 74931220448,\n \"stddev_ns\": 4219604440,\n \"avg_ts\": 2.172391,\n \"stddev_ts\": 1.447078,\n \"samples_ns\": [ 96266883012, 95222166628, 33304611705 ],\n \"samples_ts\": [ 1.32964, 1.34422, 3.84331 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T19:41:06Z", - "avg_ns": 78281596850, - "stddev_ns": 1701006234, - "avg_ts": 8.219809, - "stddev_ts": 4.234294, - "samples_ns": [ - 133059129960, - 41620670497, - 60164990093 - ], - "samples_ts": [ - 3.84791, - 12.3016, - 8.50993 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T19:46:45Z", - "avg_ns": 74931220448, - "stddev_ns": 4219604440, - "avg_ts": 2.172391, - "stddev_ts": 1.447078, - "samples_ns": [ - 96266883012, - 95222166628, - 33304611705 - ], - "samples_ts": [ - 1.32964, - 1.34422, - 3.84331 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 494 - }, - { - "timestamp_utc": "2025-12-09T20:08:03.194555+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T19:50:34Z\",\n \"avg_ns\": 92542093454,\n \"stddev_ns\": 1073967683,\n \"avg_ts\": 7.379262,\n \"stddev_ts\": 5.381145,\n \"samples_ns\": [ 115336545325, 124614811417, 37674923622 ],\n \"samples_ts\": [ 4.43918, 4.10866, 13.5899 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T19:55:49Z\",\n \"avg_ns\": 244360608485,\n \"stddev_ns\": 3669168908,\n \"avg_ts\": 2.129232,\n \"stddev_ts\": 0.340567,\n \"samples_ns\": [ 274909515854, 254417983939, 203754325663 ],\n \"samples_ts\": [ 1.86243, 2.01244, 2.51283 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T19:50:34Z", - "avg_ns": 92542093454, - "stddev_ns": 1073967683, - "avg_ts": 7.379262, - "stddev_ts": 5.381145, - "samples_ns": [ - 115336545325, - 124614811417, - 37674923622 - ], - "samples_ts": [ - 4.43918, - 4.10866, - 13.5899 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T19:55:49Z", - "avg_ns": 244360608485, - "stddev_ns": 3669168908, - "avg_ts": 2.129232, - "stddev_ts": 0.340567, - "samples_ns": [ - 274909515854, - 254417983939, - 203754325663 - ], - "samples_ts": [ - 1.86243, - 2.01244, - 2.51283 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 495 - }, - { - "timestamp_utc": "2025-12-09T20:12:00.299282+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T20:08:08Z\",\n \"avg_ns\": 30031285329,\n \"stddev_ns\": 4165551206,\n \"avg_ts\": 4.357233,\n \"stddev_ts\": 0.830235,\n \"samples_ns\": [ 33017616804, 32997554206, 24078684979 ],\n \"samples_ts\": [ 3.87672, 3.87908, 5.3159 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T20:10:12Z\",\n \"avg_ns\": 35817319284,\n \"stddev_ns\": 3864848508,\n \"avg_ts\": 4.228237,\n \"stddev_ts\": 1.773684,\n \"samples_ns\": [ 24356307570, 24384514989, 58711135293 ],\n \"samples_ts\": [ 5.25531, 5.24923, 2.18017 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T20:08:08Z", - "avg_ns": 30031285329, - "stddev_ns": 4165551206, - "avg_ts": 4.357233, - "stddev_ts": 0.830235, - "samples_ns": [ - 33017616804, - 32997554206, - 24078684979 - ], - "samples_ts": [ - 3.87672, - 3.87908, - 5.3159 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T20:10:12Z", - "avg_ns": 35817319284, - "stddev_ns": 3864848508, - "avg_ts": 4.228237, - "stddev_ts": 1.773684, - "samples_ns": [ - 24356307570, - 24384514989, - 58711135293 - ], - "samples_ts": [ - 5.25531, - 5.24923, - 2.18017 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 496 - }, - { - "timestamp_utc": "2025-12-09T20:25:46.237906+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T20:12:06Z\",\n \"avg_ns\": 33163255295,\n \"stddev_ns\": 140077894,\n \"avg_ts\": 3.859739,\n \"stddev_ts\": 0.016286,\n \"samples_ns\": [ 33036790023, 33139156850, 33313819012 ],\n \"samples_ts\": [ 3.87447, 3.8625, 3.84225 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T20:14:19Z\",\n \"avg_ns\": 228731772895,\n \"stddev_ns\": 2013636374,\n \"avg_ts\": 2.279634,\n \"stddev_ts\": 0.358818,\n \"samples_ns\": [ 202579911738, 209548730274, 274066676674 ],\n \"samples_ts\": [ 2.5274, 2.44335, 1.86816 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T20:12:06Z", - "avg_ns": 33163255295, - "stddev_ns": 140077894, - "avg_ts": 3.859739, - "stddev_ts": 0.016286, - "samples_ns": [ - 33036790023, - 33139156850, - 33313819012 - ], - "samples_ts": [ - 3.87447, - 3.8625, - 3.84225 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T20:14:19Z", - "avg_ns": 228731772895, - "stddev_ns": 2013636374, - "avg_ts": 2.279634, - "stddev_ts": 0.358818, - "samples_ns": [ - 202579911738, - 209548730274, - 274066676674 - ], - "samples_ts": [ - 2.5274, - 2.44335, - 1.86816 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 497 - }, - { - "timestamp_utc": "2025-12-09T20:35:13.417223+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T20:25:49Z\",\n \"avg_ns\": 85449466866,\n \"stddev_ns\": 690564885,\n \"avg_ts\": 7.775105,\n \"stddev_ts\": 5.087227,\n \"samples_ns\": [ 131926948471, 86582204197, 37839247931 ],\n \"samples_ts\": [ 3.88094, 5.91346, 13.5309 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T20:31:04Z\",\n \"avg_ns\": 82746540693,\n \"stddev_ns\": 3949330913,\n \"avg_ts\": 1.572126,\n \"stddev_ts\": 0.246150,\n \"samples_ns\": [ 69911559036, 95458362429, 82869700615 ],\n \"samples_ts\": [ 1.83088, 1.3409, 1.54459 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T20:25:49Z", - "avg_ns": 85449466866, - "stddev_ns": 690564885, - "avg_ts": 7.775105, - "stddev_ts": 5.087227, - "samples_ns": [ - 131926948471, - 86582204197, - 37839247931 - ], - "samples_ts": [ - 3.88094, - 5.91346, - 13.5309 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T20:31:04Z", - "avg_ns": 82746540693, - "stddev_ns": 3949330913, - "avg_ts": 1.572126, - "stddev_ts": 0.24615, - "samples_ns": [ - 69911559036, - 95458362429, - 82869700615 - ], - "samples_ts": [ - 1.83088, - 1.3409, - 1.54459 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 498 - }, - { - "timestamp_utc": "2025-12-09T20:51:19.048898+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T20:35:16Z\",\n \"avg_ns\": 92396363557,\n \"stddev_ns\": 1348038903,\n \"avg_ts\": 5.991810,\n \"stddev_ts\": 1.831636,\n \"samples_ns\": [ 70912348818, 131735397320, 74541344533 ],\n \"samples_ts\": [ 7.22018, 3.88658, 6.86867 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T20:40:31Z\",\n \"avg_ns\": 215603347158,\n \"stddev_ns\": 3731021205,\n \"avg_ts\": 2.420212,\n \"stddev_ts\": 0.387137,\n \"samples_ns\": [ 259472237799, 194097282745, 193240520931 ],\n \"samples_ts\": [ 1.97324, 2.63785, 2.64955 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T20:35:16Z", - "avg_ns": 92396363557, - "stddev_ns": 1348038903, - "avg_ts": 5.99181, - "stddev_ts": 1.831636, - "samples_ns": [ - 70912348818, - 131735397320, - 74541344533 - ], - "samples_ts": [ - 7.22018, - 3.88658, - 6.86867 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T20:40:31Z", - "avg_ns": 215603347158, - "stddev_ns": 3731021205, - "avg_ts": 2.420212, - "stddev_ts": 0.387137, - "samples_ns": [ - 259472237799, - 194097282745, - 193240520931 - ], - "samples_ts": [ - 1.97324, - 2.63785, - 2.64955 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 499 - }, - { - "timestamp_utc": "2025-12-09T20:55:58.935657+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T20:51:24Z\",\n \"avg_ns\": 32955954123,\n \"stddev_ns\": 49572347,\n \"avg_ts\": 3.883978,\n \"stddev_ts\": 0.005838,\n \"samples_ns\": [ 33012026692, 32917953654, 32937882024 ],\n \"samples_ts\": [ 3.87737, 3.88846, 3.8861 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T20:53:36Z\",\n \"avg_ns\": 47054307753,\n \"stddev_ns\": 1545861244,\n \"avg_ts\": 3.935277,\n \"stddev_ts\": 2.203384,\n \"samples_ns\": [ 91998680848, 24740854720, 24423387692 ],\n \"samples_ts\": [ 1.39132, 5.17363, 5.24088 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T20:51:24Z", - "avg_ns": 32955954123, - "stddev_ns": 49572347, - "avg_ts": 3.883978, - "stddev_ts": 0.005838, - "samples_ns": [ - 33012026692, - 32917953654, - 32937882024 - ], - "samples_ts": [ - 3.87737, - 3.88846, - 3.8861 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T20:53:36Z", - "avg_ns": 47054307753, - "stddev_ns": 1545861244, - "avg_ts": 3.935277, - "stddev_ts": 2.203384, - "samples_ns": [ - 91998680848, - 24740854720, - 24423387692 - ], - "samples_ts": [ - 1.39132, - 5.17363, - 5.24088 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 500 - }, - { - "timestamp_utc": "2025-12-09T21:10:14.354033+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T20:56:02Z\",\n \"avg_ns\": 27211918123,\n \"stddev_ns\": 4153246018,\n \"avg_ts\": 5.386227,\n \"stddev_ts\": 2.662072,\n \"samples_ns\": [ 15129804160, 33261006338, 33244943871 ],\n \"samples_ts\": [ 8.46012, 3.84835, 3.85021 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T20:57:33Z\",\n \"avg_ns\": 253349867797,\n \"stddev_ns\": 1488132863,\n \"avg_ts\": 2.047774,\n \"stddev_ts\": 0.299061,\n \"samples_ns\": [ 213948686743, 272910406719, 273190509930 ],\n \"samples_ts\": [ 2.3931, 1.87607, 1.87415 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T20:56:02Z", - "avg_ns": 27211918123, - "stddev_ns": 4153246018, - "avg_ts": 5.386227, - "stddev_ts": 2.662072, - "samples_ns": [ - 15129804160, - 33261006338, - 33244943871 - ], - "samples_ts": [ - 8.46012, - 3.84835, - 3.85021 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T20:57:33Z", - "avg_ns": 253349867797, - "stddev_ns": 1488132863, - "avg_ts": 2.047774, - "stddev_ts": 0.299061, - "samples_ns": [ - 213948686743, - 272910406719, - 273190509930 - ], - "samples_ts": [ - 2.3931, - 1.87607, - 1.87415 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 501 - }, - { - "timestamp_utc": "2025-12-09T21:18:15.233033+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T21:10:17Z\",\n \"avg_ns\": 92528564756,\n \"stddev_ns\": 2033396132,\n \"avg_ts\": 5.975163,\n \"stddev_ts\": 1.810598,\n \"samples_ns\": [ 74362581688, 131617066228, 71606046354 ],\n \"samples_ts\": [ 6.88518, 3.89007, 7.15023 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T21:15:33Z\",\n \"avg_ns\": 53807820311,\n \"stddev_ns\": 4278952693,\n \"avg_ts\": 3.173183,\n \"stddev_ts\": 1.991954,\n \"samples_ns\": [ 24028155857, 45794456831, 91600848245 ],\n \"samples_ts\": [ 5.32708, 2.7951, 1.39737 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T21:10:17Z", - "avg_ns": 92528564756, - "stddev_ns": 2033396132, - "avg_ts": 5.975163, - "stddev_ts": 1.810598, - "samples_ns": [ - 74362581688, - 131617066228, - 71606046354 - ], - "samples_ts": [ - 6.88518, - 3.89007, - 7.15023 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T21:15:33Z", - "avg_ns": 53807820311, - "stddev_ns": 4278952693, - "avg_ts": 3.173183, - "stddev_ts": 1.991954, - "samples_ns": [ - 24028155857, - 45794456831, - 91600848245 - ], - "samples_ts": [ - 5.32708, - 2.7951, - 1.39737 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 502 - }, - { - "timestamp_utc": "2025-12-09T21:35:37.730578+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T21:18:21Z\",\n \"avg_ns\": 83878351141,\n \"stddev_ns\": 727625177,\n \"avg_ts\": 7.871555,\n \"stddev_ts\": 5.000214,\n \"samples_ns\": [ 37972857018, 82027747826, 131634448581 ],\n \"samples_ts\": [ 13.4833, 6.24179, 3.88956 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T21:24:27Z\",\n \"avg_ns\": 223079286511,\n \"stddev_ns\": 1995653408,\n \"avg_ts\": 2.349857,\n \"stddev_ts\": 0.426365,\n \"samples_ns\": [ 187498120669, 210730760464, 271008978402 ],\n \"samples_ts\": [ 2.73069, 2.42964, 1.88924 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T21:18:21Z", - "avg_ns": 83878351141, - "stddev_ns": 727625177, - "avg_ts": 7.871555, - "stddev_ts": 5.000214, - "samples_ns": [ - 37972857018, - 82027747826, - 131634448581 - ], - "samples_ts": [ - 13.4833, - 6.24179, - 3.88956 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_type": "gemma3 4B Q4_K - Medium", - "model_size": 2483352832, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T21:24:27Z", - "avg_ns": 223079286511, - "stddev_ns": 1995653408, - "avg_ts": 2.349857, - "stddev_ts": 0.426365, - "samples_ns": [ - 187498120669, - 210730760464, - 271008978402 - ], - "samples_ts": [ - 2.73069, - 2.42964, - 1.88924 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q4_K_M", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 503 - }, - { - "timestamp_utc": "2025-12-09T21:42:12.894717+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T21:36:09Z\",\n \"avg_ns\": 32167273620,\n \"stddev_ns\": 4155856856,\n \"avg_ts\": 4.211321,\n \"stddev_ts\": 1.188841,\n \"samples_ns\": [ 23765347685, 30194594805, 42541878372 ],\n \"samples_ts\": [ 5.38599, 4.23917, 3.0088 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T21:38:10Z\",\n \"avg_ns\": 80514241765,\n \"stddev_ns\": 1201796443,\n \"avg_ts\": 1.708572,\n \"stddev_ts\": 0.503315,\n \"samples_ns\": [ 113419398211, 65081008065, 63042319021 ],\n \"samples_ts\": [ 1.12855, 1.96678, 2.03038 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T21:36:09Z", - "avg_ns": 32167273620, - "stddev_ns": 4155856856, - "avg_ts": 4.211321, - "stddev_ts": 1.188841, - "samples_ns": [ - 23765347685, - 30194594805, - 42541878372 - ], - "samples_ts": [ - 5.38599, - 4.23917, - 3.0088 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T21:38:10Z", - "avg_ns": 80514241765, - "stddev_ns": 1201796443, - "avg_ts": 1.708572, - "stddev_ts": 0.503315, - "samples_ns": [ - 113419398211, - 65081008065, - 63042319021 - ], - "samples_ts": [ - 1.12855, - 1.96678, - 2.03038 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 504 - }, - { - "timestamp_utc": "2025-12-09T22:01:28.548347+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T21:42:13Z\",\n \"avg_ns\": 42624695862,\n \"stddev_ns\": 38116458,\n \"avg_ts\": 3.002956,\n \"stddev_ts\": 0.002685,\n \"samples_ns\": [ 42586208835, 42625449288, 42662429464 ],\n \"samples_ts\": [ 3.00567, 3.0029, 3.0003 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T21:45:01Z\",\n \"avg_ns\": 328679723871,\n \"stddev_ns\": 2035137287,\n \"avg_ts\": 1.557787,\n \"stddev_ts\": 0.009614,\n \"samples_ns\": [ 331005016768, 327222895240, 327811259606 ],\n \"samples_ts\": [ 1.5468, 1.56468, 1.56187 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T21:42:13Z", - "avg_ns": 42624695862, - "stddev_ns": 38116458, - "avg_ts": 3.002956, - "stddev_ts": 0.002685, - "samples_ns": [ - 42586208835, - 42625449288, - 42662429464 - ], - "samples_ts": [ - 3.00567, - 3.0029, - 3.0003 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T21:45:01Z", - "avg_ns": 328679723871, - "stddev_ns": 2035137287, - "avg_ts": 1.557787, - "stddev_ts": 0.009614, - "samples_ns": [ - 331005016768, - 327222895240, - 327811259606 - ], - "samples_ts": [ - 1.5468, - 1.56468, - 1.56187 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 505 - }, - { - "timestamp_utc": "2025-12-09T22:14:26.122584+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T22:01:29Z\",\n \"avg_ns\": 133340041714,\n \"stddev_ns\": 1398337842,\n \"avg_ts\": 3.951586,\n \"stddev_ts\": 0.827560,\n \"samples_ns\": [ 133834043058, 105896679459, 160289402627 ],\n \"samples_ts\": [ 3.82563, 4.8349, 3.19422 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T22:10:22Z\",\n \"avg_ns\": 81063292196,\n \"stddev_ns\": 3636174179,\n \"avg_ts\": 1.684517,\n \"stddev_ts\": 0.478742,\n \"samples_ns\": [ 62265637867, 69051665064, 111872573658 ],\n \"samples_ts\": [ 2.05571, 1.85368, 1.14416 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T22:01:29Z", - "avg_ns": 133340041714, - "stddev_ns": 1398337842, - "avg_ts": 3.951586, - "stddev_ts": 0.82756, - "samples_ns": [ - 133834043058, - 105896679459, - 160289402627 - ], - "samples_ts": [ - 3.82563, - 4.8349, - 3.19422 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T22:10:22Z", - "avg_ns": 81063292196, - "stddev_ns": 3636174179, - "avg_ts": 1.684517, - "stddev_ts": 0.478742, - "samples_ns": [ - 62265637867, - 69051665064, - 111872573658 - ], - "samples_ts": [ - 2.05571, - 1.85368, - 1.14416 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 506 - }, - { - "timestamp_utc": "2025-12-09T22:39:29.180211+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T22:14:27Z\",\n \"avg_ns\": 121784486920,\n \"stddev_ns\": 1488752240,\n \"avg_ts\": 4.291397,\n \"stddev_ts\": 0.774231,\n \"samples_ns\": [ 126057038775, 139965702777, 99330719208 ],\n \"samples_ts\": [ 4.06165, 3.65804, 5.1545 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T22:22:27Z\",\n \"avg_ns\": 340308016665,\n \"stddev_ns\": 861020332,\n \"avg_ts\": 1.504526,\n \"stddev_ts\": 0.003808,\n \"samples_ns\": [ 340371112920, 339417184057, 341135753019 ],\n \"samples_ts\": [ 1.50424, 1.50847, 1.50087 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T22:14:27Z", - "avg_ns": 121784486920, - "stddev_ns": 1488752240, - "avg_ts": 4.291397, - "stddev_ts": 0.774231, - "samples_ns": [ - 126057038775, - 139965702777, - 99330719208 - ], - "samples_ts": [ - 4.06165, - 3.65804, - 5.1545 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T22:22:27Z", - "avg_ns": 340308016665, - "stddev_ns": 861020332, - "avg_ts": 1.504526, - "stddev_ts": 0.003808, - "samples_ns": [ - 340371112920, - 339417184057, - 341135753019 - ], - "samples_ts": [ - 1.50424, - 1.50847, - 1.50087 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 507 - }, - { - "timestamp_utc": "2025-12-09T22:45:46.526339+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T22:39:30Z\",\n \"avg_ns\": 23854739967,\n \"stddev_ns\": 56026440,\n \"avg_ts\": 5.365830,\n \"stddev_ts\": 0.012613,\n \"samples_ns\": [ 23869593834, 23792783810, 23901842259 ],\n \"samples_ts\": [ 5.36247, 5.37978, 5.35524 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T22:41:23Z\",\n \"avg_ns\": 87613502797,\n \"stddev_ns\": 3924213930,\n \"avg_ts\": 1.521716,\n \"stddev_ts\": 0.361709,\n \"samples_ns\": [ 68775371327, 112160635260, 81904501804 ],\n \"samples_ts\": [ 1.86113, 1.14122, 1.5628 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T22:39:30Z", - "avg_ns": 23854739967, - "stddev_ns": 56026440, - "avg_ts": 5.36583, - "stddev_ts": 0.012613, - "samples_ns": [ - 23869593834, - 23792783810, - 23901842259 - ], - "samples_ts": [ - 5.36247, - 5.37978, - 5.35524 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T22:41:23Z", - "avg_ns": 87613502797, - "stddev_ns": 3924213930, - "avg_ts": 1.521716, - "stddev_ts": 0.361709, - "samples_ns": [ - 68775371327, - 112160635260, - 81904501804 - ], - "samples_ts": [ - 1.86113, - 1.14122, - 1.5628 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 508 - }, - { - "timestamp_utc": "2025-12-09T23:04:51.079634+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T22:45:47Z\",\n \"avg_ns\": 25102578225,\n \"stddev_ns\": 2073163681,\n \"avg_ts\": 5.121306,\n \"stddev_ts\": 0.403711,\n \"samples_ns\": [ 23918216577, 23893100580, 27496417519 ],\n \"samples_ts\": [ 5.35157, 5.3572, 4.65515 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T22:47:26Z\",\n \"avg_ns\": 347704207570,\n \"stddev_ns\": 555738711,\n \"avg_ts\": 1.472518,\n \"stddev_ts\": 0.002352,\n \"samples_ns\": [ 347210673520, 348306169294, 347595779897 ],\n \"samples_ts\": [ 1.47461, 1.46997, 1.47298 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T22:45:47Z", - "avg_ns": 25102578225, - "stddev_ns": 2073163681, - "avg_ts": 5.121306, - "stddev_ts": 0.403711, - "samples_ns": [ - 23918216577, - 23893100580, - 27496417519 - ], - "samples_ts": [ - 5.35157, - 5.3572, - 4.65515 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T22:47:26Z", - "avg_ns": 347704207570, - "stddev_ns": 555738711, - "avg_ts": 1.472518, - "stddev_ts": 0.002352, - "samples_ns": [ - 347210673520, - 348306169294, - 347595779897 - ], - "samples_ts": [ - 1.47461, - 1.46997, - 1.47298 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 509 - }, - { - "timestamp_utc": "2025-12-09T23:17:00.834239+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T23:04:52Z\",\n \"avg_ns\": 125417612509,\n \"stddev_ns\": 1252279076,\n \"avg_ts\": 4.116251,\n \"stddev_ts\": 0.467935,\n \"samples_ns\": [ 136895562869, 129018237698, 110339036962 ],\n \"samples_ts\": [ 3.74008, 3.96843, 4.64024 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T23:12:52Z\",\n \"avg_ns\": 82538611552,\n \"stddev_ns\": 3352829829,\n \"avg_ts\": 1.650715,\n \"stddev_ts\": 0.470406,\n \"samples_ns\": [ 112490604143, 73053787752, 62071442763 ],\n \"samples_ts\": [ 1.13787, 1.75213, 2.06214 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T23:04:52Z", - "avg_ns": 125417612509, - "stddev_ns": 1252279076, - "avg_ts": 4.116251, - "stddev_ts": 0.467935, - "samples_ns": [ - 136895562869, - 129018237698, - 110339036962 - ], - "samples_ts": [ - 3.74008, - 3.96843, - 4.64024 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T23:12:52Z", - "avg_ns": 82538611552, - "stddev_ns": 3352829829, - "avg_ts": 1.650715, - "stddev_ts": 0.470406, - "samples_ns": [ - 112490604143, - 73053787752, - 62071442763 - ], - "samples_ts": [ - 1.13787, - 1.75213, - 2.06214 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 510 - }, - { - "timestamp_utc": "2025-12-09T23:42:12.177911+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T23:17:01Z\",\n \"avg_ns\": 124649048767,\n \"stddev_ns\": 4288927930,\n \"avg_ts\": 4.145751,\n \"stddev_ts\": 0.502027,\n \"samples_ns\": [ 108528861588, 129886551837, 135531732878 ],\n \"samples_ts\": [ 4.71764, 3.9419, 3.77771 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T23:25:52Z\",\n \"avg_ns\": 326302174206,\n \"stddev_ns\": 206725927,\n \"avg_ts\": 1.569098,\n \"stddev_ts\": 0.000994,\n \"samples_ns\": [ 326423586293, 326419454920, 326063481407 ],\n \"samples_ts\": [ 1.56851, 1.56853, 1.57025 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T23:17:01Z", - "avg_ns": 124649048767, - "stddev_ns": 4288927930, - "avg_ts": 4.145751, - "stddev_ts": 0.502027, - "samples_ns": [ - 108528861588, - 129886551837, - 135531732878 - ], - "samples_ts": [ - 4.71764, - 3.9419, - 3.77771 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T23:25:52Z", - "avg_ns": 326302174206, - "stddev_ns": 206725927, - "avg_ts": 1.569098, - "stddev_ts": 0.000994, - "samples_ns": [ - 326423586293, - 326419454920, - 326063481407 - ], - "samples_ts": [ - 1.56851, - 1.56853, - 1.57025 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 511 - }, - { - "timestamp_utc": "2025-12-09T23:48:52.536933+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T23:42:13Z\",\n \"avg_ns\": 42340167773,\n \"stddev_ns\": 36893067,\n \"avg_ts\": 3.023136,\n \"stddev_ts\": 0.002633,\n \"samples_ns\": [ 42381078284, 42329996554, 42309428483 ],\n \"samples_ts\": [ 3.02022, 3.02386, 3.02533 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T23:44:58Z\",\n \"avg_ns\": 77628223858,\n \"stddev_ns\": 2443179006,\n \"avg_ts\": 1.735322,\n \"stddev_ts\": 0.441044,\n \"samples_ns\": [ 66657237173, 62324151407, 103903282996 ],\n \"samples_ts\": [ 1.92027, 2.05378, 1.23191 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T23:42:13Z", - "avg_ns": 42340167773, - "stddev_ns": 36893067, - "avg_ts": 3.023136, - "stddev_ts": 0.002633, - "samples_ns": [ - 42381078284, - 42329996554, - 42309428483 - ], - "samples_ts": [ - 3.02022, - 3.02386, - 3.02533 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-09T23:44:58Z", - "avg_ns": 77628223858, - "stddev_ns": 2443179006, - "avg_ts": 1.735322, - "stddev_ts": 0.441044, - "samples_ns": [ - 66657237173, - 62324151407, - 103903282996 - ], - "samples_ts": [ - 1.92027, - 2.05378, - 1.23191 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 512 - }, - { - "timestamp_utc": "2025-12-10T00:07:41.619978+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T23:48:54Z\",\n \"avg_ns\": 28551405340,\n \"stddev_ns\": 1453749642,\n \"avg_ts\": 4.703262,\n \"stddev_ts\": 1.154490,\n \"samples_ns\": [ 37980215115, 23824673631, 23849327274 ],\n \"samples_ts\": [ 3.37018, 5.37258, 5.36703 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T23:51:02Z\",\n \"avg_ns\": 332794221439,\n \"stddev_ns\": 4076512577,\n \"avg_ts\": 1.539562,\n \"stddev_ts\": 0.049340,\n \"samples_ns\": [ 326392884829, 326649184096, 345340595393 ],\n \"samples_ts\": [ 1.56866, 1.56743, 1.48259 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-09T23:48:54Z", - "avg_ns": 28551405340, - "stddev_ns": 1453749642, - "avg_ts": 4.703262, - "stddev_ts": 1.15449, - "samples_ns": [ - 37980215115, - 23824673631, - 23849327274 - ], - "samples_ts": [ - 3.37018, - 5.37258, - 5.36703 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-09T23:51:02Z", - "avg_ns": 332794221439, - "stddev_ns": 4076512577, - "avg_ts": 1.539562, - "stddev_ts": 0.04934, - "samples_ns": [ - 326392884829, - 326649184096, - 345340595393 - ], - "samples_ts": [ - 1.56866, - 1.56743, - 1.48259 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 513 - }, - { - "timestamp_utc": "2025-12-10T00:20:53.068901+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T00:07:43Z\",\n \"avg_ns\": 121308078999,\n \"stddev_ns\": 4148934167,\n \"avg_ts\": 4.402918,\n \"stddev_ts\": 1.047708,\n \"samples_ns\": [ 96358077372, 157159582945, 110406576681 ],\n \"samples_ts\": [ 5.31351, 3.25784, 4.6374 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T00:16:28Z\",\n \"avg_ns\": 88072244241,\n \"stddev_ns\": 4043527556,\n \"avg_ts\": 1.507985,\n \"stddev_ts\": 0.330770,\n \"samples_ns\": [ 73111164567, 113149468089, 77956100067 ],\n \"samples_ts\": [ 1.75076, 1.13125, 1.64195 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T00:07:43Z", - "avg_ns": 121308078999, - "stddev_ns": 4148934167, - "avg_ts": 4.402918, - "stddev_ts": 1.047708, - "samples_ns": [ - 96358077372, - 157159582945, - 110406576681 - ], - "samples_ts": [ - 5.31351, - 3.25784, - 4.6374 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-10T00:16:28Z", - "avg_ns": 88072244241, - "stddev_ns": 4043527556, - "avg_ts": 1.507985, - "stddev_ts": 0.33077, - "samples_ns": [ - 73111164567, - 113149468089, - 77956100067 - ], - "samples_ts": [ - 1.75076, - 1.13125, - 1.64195 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 514 - }, - { - "timestamp_utc": "2025-12-10T00:45:49.886674+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T00:20:54Z\",\n \"avg_ns\": 137871627848,\n \"stddev_ns\": 1760972080,\n \"avg_ts\": 3.925765,\n \"stddev_ts\": 1.211666,\n \"samples_ns\": [ 161810900842, 96186620615, 155617362089 ],\n \"samples_ts\": [ 3.16419, 5.32299, 3.29012 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T00:29:33Z\",\n \"avg_ns\": 325041720096,\n \"stddev_ns\": 1167182090,\n \"avg_ts\": 1.575918,\n \"stddev_ts\": 0.041392,\n \"samples_ns\": [ 335051667269, 320033882375, 320039610645 ],\n \"samples_ts\": [ 1.52812, 1.59983, 1.5998 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T00:20:54Z", - "avg_ns": 137871627848, - "stddev_ns": 1760972080, - "avg_ts": 3.925765, - "stddev_ts": 1.211666, - "samples_ns": [ - 161810900842, - 96186620615, - 155617362089 - ], - "samples_ts": [ - 3.16419, - 5.32299, - 3.29012 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-10T00:29:33Z", - "avg_ns": 325041720096, - "stddev_ns": 1167182090, - "avg_ts": 1.575918, - "stddev_ts": 0.041392, - "samples_ns": [ - 335051667269, - 320033882375, - 320039610645 - ], - "samples_ts": [ - 1.52812, - 1.59983, - 1.5998 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 515 - }, - { - "timestamp_utc": "2025-12-10T00:51:45.225454+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T00:45:50Z\",\n \"avg_ns\": 25688686136,\n \"stddev_ns\": 1310123860,\n \"avg_ts\": 5.034529,\n \"stddev_ts\": 0.603370,\n \"samples_ns\": [ 23767391764, 23790790431, 29507876215 ],\n \"samples_ts\": [ 5.38553, 5.38023, 4.33782 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T00:47:31Z\",\n \"avg_ns\": 84160683209,\n \"stddev_ns\": 3348636954,\n \"avg_ts\": 1.610573,\n \"stddev_ts\": 0.468183,\n \"samples_ns\": [ 109848010578, 81643204168, 60990834881 ],\n \"samples_ts\": [ 1.16525, 1.5678, 2.09868 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T00:45:50Z", - "avg_ns": 25688686136, - "stddev_ns": 1310123860, - "avg_ts": 5.034529, - "stddev_ts": 0.60337, - "samples_ns": [ - 23767391764, - 23790790431, - 29507876215 - ], - "samples_ts": [ - 5.38553, - 5.38023, - 4.33782 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-10T00:47:31Z", - "avg_ns": 84160683209, - "stddev_ns": 3348636954, - "avg_ts": 1.610573, - "stddev_ts": 0.468183, - "samples_ns": [ - 109848010578, - 81643204168, - 60990834881 - ], - "samples_ts": [ - 1.16525, - 1.5678, - 2.09868 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 516 - }, - { - "timestamp_utc": "2025-12-10T01:10:47.864724+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T00:51:46Z\",\n \"avg_ns\": 40334066025,\n \"stddev_ns\": 3908023539,\n \"avg_ts\": 3.194675,\n \"stddev_ts\": 0.327878,\n \"samples_ns\": [ 35821473131, 42595552216, 42585172730 ],\n \"samples_ts\": [ 3.57328, 3.00501, 3.00574 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T00:54:11Z\",\n \"avg_ns\": 331901987606,\n \"stddev_ns\": 1555334218,\n \"avg_ts\": 1.542988,\n \"stddev_ts\": 0.028905,\n \"samples_ns\": [ 338958340183, 329776832668, 326970789967 ],\n \"samples_ts\": [ 1.51051, 1.55257, 1.56589 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T00:51:46Z", - "avg_ns": 40334066025, - "stddev_ns": 3908023539, - "avg_ts": 3.194675, - "stddev_ts": 0.327878, - "samples_ns": [ - 35821473131, - 42595552216, - 42585172730 - ], - "samples_ts": [ - 3.57328, - 3.00501, - 3.00574 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-10T00:54:11Z", - "avg_ns": 331901987606, - "stddev_ns": 1555334218, - "avg_ts": 1.542988, - "stddev_ts": 0.028905, - "samples_ns": [ - 338958340183, - 329776832668, - 326970789967 - ], - "samples_ts": [ - 1.51051, - 1.55257, - 1.56589 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 517 - }, - { - "timestamp_utc": "2025-12-10T01:23:28.897251+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T01:10:48Z\",\n \"avg_ns\": 137011934606,\n \"stddev_ns\": 3564624128,\n \"avg_ts\": 3.953170,\n \"stddev_ts\": 1.218164,\n \"samples_ns\": [ 165677108392, 95760897218, 149597798210 ],\n \"samples_ts\": [ 3.09035, 5.34665, 3.42251 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T01:19:20Z\",\n \"avg_ns\": 82372511743,\n \"stddev_ns\": 1992511151,\n \"avg_ts\": 1.596743,\n \"stddev_ts\": 0.333868,\n \"samples_ns\": [ 86193543157, 64904009833, 96019982241 ],\n \"samples_ts\": [ 1.48503, 1.97214, 1.33306 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T01:10:48Z", - "avg_ns": 137011934606, - "stddev_ns": 3564624128, - "avg_ts": 3.95317, - "stddev_ts": 1.218164, - "samples_ns": [ - 165677108392, - 95760897218, - 149597798210 - ], - "samples_ts": [ - 3.09035, - 5.34665, - 3.42251 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-10T01:19:20Z", - "avg_ns": 82372511743, - "stddev_ns": 1992511151, - "avg_ts": 1.596743, - "stddev_ts": 0.333868, - "samples_ns": [ - 86193543157, - 64904009833, - 96019982241 - ], - "samples_ts": [ - 1.48503, - 1.97214, - 1.33306 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 518 - }, - { - "timestamp_utc": "2025-12-10T01:50:00.836180+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T01:23:30Z\",\n \"avg_ns\": 121917489901,\n \"stddev_ns\": 2396736246,\n \"avg_ts\": 4.411770,\n \"stddev_ts\": 1.130201,\n \"samples_ns\": [ 110271628447, 160813669190, 94667172068 ],\n \"samples_ts\": [ 4.64308, 3.18381, 5.40842 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T01:31:53Z\",\n \"avg_ns\": 362006181259,\n \"stddev_ns\": 4287783453,\n \"avg_ts\": 1.415287,\n \"stddev_ts\": 0.045232,\n \"samples_ns\": [ 348894430456, 368205623569, 368918489752 ],\n \"samples_ts\": [ 1.46749, 1.39053, 1.38784 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T01:23:30Z", - "avg_ns": 121917489901, - "stddev_ns": 2396736246, - "avg_ts": 4.41177, - "stddev_ts": 1.130201, - "samples_ns": [ - 110271628447, - 160813669190, - 94667172068 - ], - "samples_ts": [ - 4.64308, - 3.18381, - 5.40842 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-10T01:31:53Z", - "avg_ns": 362006181259, - "stddev_ns": 4287783453, - "avg_ts": 1.415287, - "stddev_ts": 0.045232, - "samples_ns": [ - 348894430456, - 368205623569, - 368918489752 - ], - "samples_ts": [ - 1.46749, - 1.39053, - 1.38784 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 519 - }, - { - "timestamp_utc": "2025-12-10T01:56:09.921465+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T01:50:02Z\",\n \"avg_ns\": 23914529604,\n \"stddev_ns\": 67406711,\n \"avg_ts\": 5.352423,\n \"stddev_ts\": 0.015079,\n \"samples_ns\": [ 23985460293, 23851310972, 23906817549 ],\n \"samples_ts\": [ 5.33657, 5.36658, 5.35412 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T01:51:41Z\",\n \"avg_ns\": 89360823705,\n \"stddev_ns\": 4183215369,\n \"avg_ts\": 1.505041,\n \"stddev_ts\": 0.402822,\n \"samples_ns\": [ 85817048200, 115405184971, 66860237945 ],\n \"samples_ts\": [ 1.49155, 1.10914, 1.91444 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T01:50:02Z", - "avg_ns": 23914529604, - "stddev_ns": 67406711, - "avg_ts": 5.352423, - "stddev_ts": 0.015079, - "samples_ns": [ - 23985460293, - 23851310972, - 23906817549 - ], - "samples_ts": [ - 5.33657, - 5.36658, - 5.35412 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-10T01:51:41Z", - "avg_ns": 89360823705, - "stddev_ns": 4183215369, - "avg_ts": 1.505041, - "stddev_ts": 0.402822, - "samples_ns": [ - 85817048200, - 115405184971, - 66860237945 - ], - "samples_ts": [ - 1.49155, - 1.10914, - 1.91444 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 520 - }, - { - "timestamp_utc": "2025-12-10T02:15:42.122753+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T01:56:10Z\",\n \"avg_ns\": 31258065704,\n \"stddev_ns\": 4267303221,\n \"avg_ts\": 4.354541,\n \"stddev_ts\": 1.215705,\n \"samples_ns\": [ 23984785938, 27049657712, 42739753463 ],\n \"samples_ts\": [ 5.33672, 4.73204, 2.99487 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T01:58:08Z\",\n \"avg_ns\": 350804773554,\n \"stddev_ns\": 4010224832,\n \"avg_ts\": 1.461657,\n \"stddev_ts\": 0.069682,\n \"samples_ns\": [ 361145038070, 359233249491, 332036033103 ],\n \"samples_ts\": [ 1.41771, 1.42526, 1.542 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T01:56:10Z", - "avg_ns": 31258065704, - "stddev_ns": 4267303221, - "avg_ts": 4.354541, - "stddev_ts": 1.215705, - "samples_ns": [ - 23984785938, - 27049657712, - 42739753463 - ], - "samples_ts": [ - 5.33672, - 4.73204, - 2.99487 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-10T01:58:08Z", - "avg_ns": 350804773554, - "stddev_ns": 4010224832, - "avg_ts": 1.461657, - "stddev_ts": 0.069682, - "samples_ns": [ - 361145038070, - 359233249491, - 332036033103 - ], - "samples_ts": [ - 1.41771, - 1.42526, - 1.542 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 521 - }, - { - "timestamp_utc": "2025-12-10T02:28:15.564176+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T02:15:43Z\",\n \"avg_ns\": 136504320980,\n \"stddev_ns\": 1634498289,\n \"avg_ts\": 3.974207,\n \"stddev_ts\": 1.222888,\n \"samples_ns\": [ 170125008196, 95708400007, 143679554738 ],\n \"samples_ts\": [ 3.00955, 5.34958, 3.56349 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T02:24:08Z\",\n \"avg_ns\": 81959549376,\n \"stddev_ns\": 3982727389,\n \"avg_ts\": 1.602260,\n \"stddev_ts\": 0.329900,\n \"samples_ns\": [ 91529139242, 64552675115, 89796833772 ],\n \"samples_ts\": [ 1.39846, 1.98288, 1.42544 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T02:15:43Z", - "avg_ns": 136504320980, - "stddev_ns": 1634498289, - "avg_ts": 3.974207, - "stddev_ts": 1.222888, - "samples_ns": [ - 170125008196, - 95708400007, - 143679554738 - ], - "samples_ts": [ - 3.00955, - 5.34958, - 3.56349 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-10T02:24:08Z", - "avg_ns": 81959549376, - "stddev_ns": 3982727389, - "avg_ts": 1.60226, - "stddev_ts": 0.3299, - "samples_ns": [ - 91529139242, - 64552675115, - 89796833772 - ], - "samples_ts": [ - 1.39846, - 1.98288, - 1.42544 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 522 - }, - { - "timestamp_utc": "2025-12-10T02:54:38.378869+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T02:28:17Z\",\n \"avg_ns\": 120203107970,\n \"stddev_ns\": 3928265922,\n \"avg_ts\": 4.558601,\n \"stddev_ts\": 1.307054,\n \"samples_ns\": [ 95504601837, 167848199345, 97256522729 ],\n \"samples_ts\": [ 5.361, 3.05038, 5.26443 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T02:36:42Z\",\n \"avg_ns\": 358406409104,\n \"stddev_ns\": 3990959541,\n \"avg_ts\": 1.430637,\n \"stddev_ts\": 0.067907,\n \"samples_ns\": [ 339286437852, 368013286642, 367919502820 ],\n \"samples_ts\": [ 1.50905, 1.39125, 1.39161 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T02:28:17Z", - "avg_ns": 120203107970, - "stddev_ns": 3928265922, - "avg_ts": 4.558601, - "stddev_ts": 1.307054, - "samples_ns": [ - 95504601837, - 167848199345, - 97256522729 - ], - "samples_ts": [ - 5.361, - 3.05038, - 5.26443 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-10T02:36:42Z", - "avg_ns": 358406409104, - "stddev_ns": 3990959541, - "avg_ts": 1.430637, - "stddev_ts": 0.067907, - "samples_ns": [ - 339286437852, - 368013286642, - 367919502820 - ], - "samples_ts": [ - 1.50905, - 1.39125, - 1.39161 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 523 - }, - { - "timestamp_utc": "2025-12-10T03:00:55.131070+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T02:54:40Z\",\n \"avg_ns\": 23844174717,\n \"stddev_ns\": 75514157,\n \"avg_ts\": 5.368223,\n \"stddev_ts\": 0.017022,\n \"samples_ns\": [ 23906458432, 23865880704, 23760185017 ],\n \"samples_ts\": [ 5.3542, 5.36331, 5.38716 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T02:56:28Z\",\n \"avg_ns\": 88653642230,\n \"stddev_ns\": 965267315,\n \"avg_ts\": 1.497458,\n \"stddev_ts\": 0.325597,\n \"samples_ns\": [ 74392512946, 113895314803, 77673098942 ],\n \"samples_ts\": [ 1.7206, 1.12384, 1.64793 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T02:54:40Z", - "avg_ns": 23844174717, - "stddev_ns": 75514157, - "avg_ts": 5.368223, - "stddev_ts": 0.017022, - "samples_ns": [ - 23906458432, - 23865880704, - 23760185017 - ], - "samples_ts": [ - 5.3542, - 5.36331, - 5.38716 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-10T02:56:28Z", - "avg_ns": 88653642230, - "stddev_ns": 965267315, - "avg_ts": 1.497458, - "stddev_ts": 0.325597, - "samples_ns": [ - 74392512946, - 113895314803, - 77673098942 - ], - "samples_ts": [ - 1.7206, - 1.12384, - 1.64793 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 524 - }, - { - "timestamp_utc": "2025-12-10T03:20:20.694897+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T03:00:56Z\",\n \"avg_ns\": 26796250638,\n \"stddev_ns\": 1244105354,\n \"avg_ts\": 4.895749,\n \"stddev_ts\": 0.884604,\n \"samples_ns\": [ 23626849941, 23724333266, 33037568709 ],\n \"samples_ts\": [ 5.41757, 5.3953, 3.87438 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T03:02:40Z\",\n \"avg_ns\": 353120207478,\n \"stddev_ns\": 1824229334,\n \"avg_ts\": 1.450101,\n \"stddev_ts\": 0.019308,\n \"samples_ns\": [ 355586571697, 356035801338, 347738249400 ],\n \"samples_ts\": [ 1.43987, 1.43806, 1.47237 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T03:00:56Z", - "avg_ns": 26796250638, - "stddev_ns": 1244105354, - "avg_ts": 4.895749, - "stddev_ts": 0.884604, - "samples_ns": [ - 23626849941, - 23724333266, - 33037568709 - ], - "samples_ts": [ - 5.41757, - 5.3953, - 3.87438 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-10T03:02:40Z", - "avg_ns": 353120207478, - "stddev_ns": 1824229334, - "avg_ts": 1.450101, - "stddev_ts": 0.019308, - "samples_ns": [ - 355586571697, - 356035801338, - 347738249400 - ], - "samples_ts": [ - 1.43987, - 1.43806, - 1.47237 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 525 - }, - { - "timestamp_utc": "2025-12-10T03:32:33.403787+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T03:20:21Z\",\n \"avg_ns\": 132004551425,\n \"stddev_ns\": 3845424544,\n \"avg_ts\": 3.943097,\n \"stddev_ts\": 0.611297,\n \"samples_ns\": [ 154364717200, 112819492374, 128829444702 ],\n \"samples_ts\": [ 3.31682, 4.53822, 3.97425 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T03:28:33Z\",\n \"avg_ns\": 79424760271,\n \"stddev_ns\": 3654033158,\n \"avg_ts\": 1.691560,\n \"stddev_ts\": 0.422713,\n \"samples_ns\": [ 105260411465, 63220083033, 69793786317 ],\n \"samples_ts\": [ 1.21603, 2.02467, 1.83397 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T03:20:21Z", - "avg_ns": 132004551425, - "stddev_ns": 3845424544, - "avg_ts": 3.943097, - "stddev_ts": 0.611297, - "samples_ns": [ - 154364717200, - 112819492374, - 128829444702 - ], - "samples_ts": [ - 3.31682, - 4.53822, - 3.97425 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-10T03:28:33Z", - "avg_ns": 79424760271, - "stddev_ns": 3654033158, - "avg_ts": 1.69156, - "stddev_ts": 0.422713, - "samples_ns": [ - 105260411465, - 63220083033, - 69793786317 - ], - "samples_ts": [ - 1.21603, - 2.02467, - 1.83397 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 526 - }, - { - "timestamp_utc": "2025-12-10T03:58:25.423292+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T03:32:35Z\",\n \"avg_ns\": 121275578053,\n \"stddev_ns\": 3506052197,\n \"avg_ts\": 4.377143,\n \"stddev_ts\": 0.986427,\n \"samples_ns\": [ 96320465272, 152886985928, 114619282961 ],\n \"samples_ts\": [ 5.31559, 3.34888, 4.46696 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T03:41:22Z\",\n \"avg_ns\": 340646272831,\n \"stddev_ns\": 3783124481,\n \"avg_ts\": 1.504323,\n \"stddev_ts\": 0.053672,\n \"samples_ns\": [ 331076452173, 336267313338, 354595052984 ],\n \"samples_ts\": [ 1.54647, 1.5226, 1.4439 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T03:32:35Z", - "avg_ns": 121275578053, - "stddev_ns": 3506052197, - "avg_ts": 4.377143, - "stddev_ts": 0.986427, - "samples_ns": [ - 96320465272, - 152886985928, - 114619282961 - ], - "samples_ts": [ - 5.31559, - 3.34888, - 4.46696 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-10T03:41:22Z", - "avg_ns": 340646272831, - "stddev_ns": 3783124481, - "avg_ts": 1.504323, - "stddev_ts": 0.053672, - "samples_ns": [ - 331076452173, - 336267313338, - 354595052984 - ], - "samples_ts": [ - 1.54647, - 1.5226, - 1.4439 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 527 - }, - { - "timestamp_utc": "2025-12-10T04:05:15.859721+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T03:58:27Z\",\n \"avg_ns\": 32544519193,\n \"stddev_ns\": 1868328341,\n \"avg_ts\": 4.154948,\n \"stddev_ts\": 1.180524,\n \"samples_ns\": [ 42308215146, 31536129633, 23789212801 ],\n \"samples_ts\": [ 3.02542, 4.05884, 5.38059 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T04:00:47Z\",\n \"avg_ns\": 89346369359,\n \"stddev_ns\": 4117750489,\n \"avg_ts\": 1.512129,\n \"stddev_ts\": 0.452462,\n \"samples_ns\": [ 63203989522, 95448570721, 109386547835 ],\n \"samples_ts\": [ 2.02519, 1.34104, 1.17016 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T03:58:27Z", - "avg_ns": 32544519193, - "stddev_ns": 1868328341, - "avg_ts": 4.154948, - "stddev_ts": 1.180524, - "samples_ns": [ - 42308215146, - 31536129633, - 23789212801 - ], - "samples_ts": [ - 3.02542, - 4.05884, - 5.38059 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-10T04:00:47Z", - "avg_ns": 89346369359, - "stddev_ns": 4117750489, - "avg_ts": 1.512129, - "stddev_ts": 0.452462, - "samples_ns": [ - 63203989522, - 95448570721, - 109386547835 - ], - "samples_ts": [ - 2.02519, - 1.34104, - 1.17016 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 528 - }, - { - "timestamp_utc": "2025-12-10T04:24:46.906370+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T04:05:16Z\",\n \"avg_ns\": 23865202003,\n \"stddev_ns\": 16860347,\n \"avg_ts\": 5.363459,\n \"stddev_ts\": 0.003789,\n \"samples_ns\": [ 23867584844, 23880742742, 23847278425 ],\n \"samples_ts\": [ 5.36292, 5.35997, 5.36749 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T04:06:52Z\",\n \"avg_ns\": 357922229397,\n \"stddev_ns\": 1907651210,\n \"avg_ts\": 1.431557,\n \"stddev_ts\": 0.048579,\n \"samples_ns\": [ 344186922968, 364085800723, 365493964501 ],\n \"samples_ts\": [ 1.48756, 1.40626, 1.40084 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T04:05:16Z", - "avg_ns": 23865202003, - "stddev_ns": 16860347, - "avg_ts": 5.363459, - "stddev_ts": 0.003789, - "samples_ns": [ - 23867584844, - 23880742742, - 23847278425 - ], - "samples_ts": [ - 5.36292, - 5.35997, - 5.36749 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-10T04:06:52Z", - "avg_ns": 357922229397, - "stddev_ns": 1907651210, - "avg_ts": 1.431557, - "stddev_ts": 0.048579, - "samples_ns": [ - 344186922968, - 364085800723, - 365493964501 - ], - "samples_ts": [ - 1.48756, - 1.40626, - 1.40084 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 529 - }, - { - "timestamp_utc": "2025-12-10T04:37:05.065118+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T04:24:48Z\",\n \"avg_ns\": 124905727755,\n \"stddev_ns\": 4085519829,\n \"avg_ts\": 4.136988,\n \"stddev_ts\": 0.500737,\n \"samples_ns\": [ 134779581818, 131284240064, 108653361383 ],\n \"samples_ts\": [ 3.79879, 3.89993, 4.71223 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T04:32:48Z\",\n \"avg_ns\": 85214608074,\n \"stddev_ns\": 3908439799,\n \"avg_ts\": 1.597674,\n \"stddev_ts\": 0.451205,\n \"samples_ns\": [ 116109696605, 75062573316, 64471554303 ],\n \"samples_ts\": [ 1.10241, 1.70524, 1.98537 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T04:24:48Z", - "avg_ns": 124905727755, - "stddev_ns": 4085519829, - "avg_ts": 4.136988, - "stddev_ts": 0.500737, - "samples_ns": [ - 134779581818, - 131284240064, - 108653361383 - ], - "samples_ts": [ - 3.79879, - 3.89993, - 4.71223 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-10T04:32:48Z", - "avg_ns": 85214608074, - "stddev_ns": 3908439799, - "avg_ts": 1.597674, - "stddev_ts": 0.451205, - "samples_ns": [ - 116109696605, - 75062573316, - 64471554303 - ], - "samples_ts": [ - 1.10241, - 1.70524, - 1.98537 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 530 - }, - { - "timestamp_utc": "2025-12-10T05:02:38.552967+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T04:37:06Z\",\n \"avg_ns\": 124031168738,\n \"stddev_ns\": 705627031,\n \"avg_ts\": 4.176712,\n \"stddev_ts\": 0.573801,\n \"samples_ns\": [ 105810833956, 133878563927, 132404108331 ],\n \"samples_ts\": [ 4.83882, 3.82436, 3.86695 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T04:45:58Z\",\n \"avg_ns\": 333039630673,\n \"stddev_ns\": 1071026171,\n \"avg_ts\": 1.537619,\n \"stddev_ts\": 0.024565,\n \"samples_ns\": [ 330454109205, 329453506130, 339211276686 ],\n \"samples_ts\": [ 1.54938, 1.55409, 1.50938 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T04:37:06Z", - "avg_ns": 124031168738, - "stddev_ns": 705627031, - "avg_ts": 4.176712, - "stddev_ts": 0.573801, - "samples_ns": [ - 105810833956, - 133878563927, - 132404108331 - ], - "samples_ts": [ - 4.83882, - 3.82436, - 3.86695 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-10T04:45:58Z", - "avg_ns": 333039630673, - "stddev_ns": 1071026171, - "avg_ts": 1.537619, - "stddev_ts": 0.024565, - "samples_ns": [ - 330454109205, - 329453506130, - 339211276686 - ], - "samples_ts": [ - 1.54938, - 1.55409, - 1.50938 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 531 - }, - { - "timestamp_utc": "2025-12-10T05:09:30.430996+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T05:02:40Z\",\n \"avg_ns\": 39341939642,\n \"stddev_ns\": 3907219139,\n \"avg_ts\": 3.290950,\n \"stddev_ts\": 0.446289,\n \"samples_ns\": [ 42328298549, 42067880804, 33629639574 ],\n \"samples_ts\": [ 3.02398, 3.0427, 3.80617 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T05:05:20Z\",\n \"avg_ns\": 82953524588,\n \"stddev_ns\": 2617787468,\n \"avg_ts\": 1.651802,\n \"stddev_ts\": 0.480323,\n \"samples_ns\": [ 63247899940, 70249096961, 115363576864 ],\n \"samples_ts\": [ 2.02378, 1.82209, 1.10954 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T05:02:40Z", - "avg_ns": 39341939642, - "stddev_ns": 3907219139, - "avg_ts": 3.29095, - "stddev_ts": 0.446289, - "samples_ns": [ - 42328298549, - 42067880804, - 33629639574 - ], - "samples_ts": [ - 3.02398, - 3.0427, - 3.80617 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-10T05:05:20Z", - "avg_ns": 82953524588, - "stddev_ns": 2617787468, - "avg_ts": 1.651802, - "stddev_ts": 0.480323, - "samples_ns": [ - 63247899940, - 70249096961, - 115363576864 - ], - "samples_ts": [ - 2.02378, - 1.82209, - 1.10954 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 532 - }, - { - "timestamp_utc": "2025-12-10T05:28:56.719670+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T05:09:32Z\",\n \"avg_ns\": 23898682357,\n \"stddev_ns\": 43351541,\n \"avg_ts\": 5.355956,\n \"stddev_ts\": 0.009725,\n \"samples_ns\": [ 23925929416, 23921425114, 23848692543 ],\n \"samples_ts\": [ 5.34984, 5.35085, 5.36717 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T05:11:24Z\",\n \"avg_ns\": 350540341358,\n \"stddev_ns\": 1432407006,\n \"avg_ts\": 1.462232,\n \"stddev_ts\": 0.059864,\n \"samples_ns\": [ 336041842174, 350911283331, 364667898569 ],\n \"samples_ts\": [ 1.52362, 1.45906, 1.40402 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T05:09:32Z", - "avg_ns": 23898682357, - "stddev_ns": 43351541, - "avg_ts": 5.355956, - "stddev_ts": 0.009725, - "samples_ns": [ - 23925929416, - 23921425114, - 23848692543 - ], - "samples_ts": [ - 5.34984, - 5.35085, - 5.36717 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-10T05:11:24Z", - "avg_ns": 350540341358, - "stddev_ns": 1432407006, - "avg_ts": 1.462232, - "stddev_ts": 0.059864, - "samples_ns": [ - 336041842174, - 350911283331, - 364667898569 - ], - "samples_ts": [ - 1.52362, - 1.45906, - 1.40402 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 533 - }, - { - "timestamp_utc": "2025-12-10T05:41:36.982508+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T05:28:58Z\",\n \"avg_ns\": 120452968086,\n \"stddev_ns\": 1518056122,\n \"avg_ts\": 4.417620,\n \"stddev_ts\": 1.019976,\n \"samples_ns\": [ 112482247091, 153422788469, 95453868700 ],\n \"samples_ts\": [ 4.55183, 3.33718, 5.36385 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T05:37:06Z\",\n \"avg_ns\": 89902617973,\n \"stddev_ns\": 1500113072,\n \"avg_ts\": 1.494104,\n \"stddev_ts\": 0.425249,\n \"samples_ns\": [ 107102066487, 97989614704, 64616172728 ],\n \"samples_ts\": [ 1.19512, 1.30626, 1.98093 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T05:28:58Z", - "avg_ns": 120452968086, - "stddev_ns": 1518056122, - "avg_ts": 4.41762, - "stddev_ts": 1.019976, - "samples_ns": [ - 112482247091, - 153422788469, - 95453868700 - ], - "samples_ts": [ - 4.55183, - 3.33718, - 5.36385 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-10T05:37:06Z", - "avg_ns": 89902617973, - "stddev_ns": 1500113072, - "avg_ts": 1.494104, - "stddev_ts": 0.425249, - "samples_ns": [ - 107102066487, - 97989614704, - 64616172728 - ], - "samples_ts": [ - 1.19512, - 1.30626, - 1.98093 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 534 - }, - { - "timestamp_utc": "2025-12-10T06:07:30.442684+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T05:41:37Z\",\n \"avg_ns\": 131366854821,\n \"stddev_ns\": 3954535757,\n \"avg_ts\": 3.962773,\n \"stddev_ts\": 0.615722,\n \"samples_ns\": [ 127878077313, 112355887879, 153866599273 ],\n \"samples_ts\": [ 4.00381, 4.55695, 3.32756 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T05:50:30Z\",\n \"avg_ns\": 339657362093,\n \"stddev_ns\": 4124570565,\n \"avg_ts\": 1.507549,\n \"stddev_ts\": 0.018183,\n \"samples_ns\": [ 336970030572, 337595771823, 344406283886 ],\n \"samples_ts\": [ 1.51942, 1.51661, 1.48662 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T05:41:37Z", - "avg_ns": 131366854821, - "stddev_ns": 3954535757, - "avg_ts": 3.962773, - "stddev_ts": 0.615722, - "samples_ns": [ - 127878077313, - 112355887879, - 153866599273 - ], - "samples_ts": [ - 4.00381, - 4.55695, - 3.32756 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-10T05:50:30Z", - "avg_ns": 339657362093, - "stddev_ns": 4124570565, - "avg_ts": 1.507549, - "stddev_ts": 0.018183, - "samples_ns": [ - 336970030572, - 337595771823, - 344406283886 - ], - "samples_ts": [ - 1.51942, - 1.51661, - 1.48662 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 535 - }, - { - "timestamp_utc": "2025-12-10T06:14:24.869716+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T06:07:32Z\",\n \"avg_ns\": 40686618733,\n \"stddev_ns\": 1042078098,\n \"avg_ts\": 3.159742,\n \"stddev_ts\": 0.261251,\n \"samples_ns\": [ 42573487245, 42507075600, 36979293356 ],\n \"samples_ts\": [ 3.00657, 3.01126, 3.4614 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T06:10:16Z\",\n \"avg_ns\": 82439383310,\n \"stddev_ns\": 4272480171,\n \"avg_ts\": 1.661391,\n \"stddev_ts\": 0.478306,\n \"samples_ns\": [ 63946892124, 68522335415, 114848922391 ],\n \"samples_ts\": [ 2.00166, 1.868, 1.11451 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T06:07:32Z", - "avg_ns": 40686618733, - "stddev_ns": 1042078098, - "avg_ts": 3.159742, - "stddev_ts": 0.261251, - "samples_ns": [ - 42573487245, - 42507075600, - 36979293356 - ], - "samples_ts": [ - 3.00657, - 3.01126, - 3.4614 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-10T06:10:16Z", - "avg_ns": 82439383310, - "stddev_ns": 4272480171, - "avg_ts": 1.661391, - "stddev_ts": 0.478306, - "samples_ns": [ - 63946892124, - 68522335415, - 114848922391 - ], - "samples_ts": [ - 2.00166, - 1.868, - 1.11451 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 536 - }, - { - "timestamp_utc": "2025-12-10T06:33:57.475252+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T06:14:26Z\",\n \"avg_ns\": 23971950023,\n \"stddev_ns\": 75897827,\n \"avg_ts\": 5.339610,\n \"stddev_ts\": 0.016888,\n \"samples_ns\": [ 24055322415, 23906871190, 23953656466 ],\n \"samples_ts\": [ 5.32107, 5.35411, 5.34365 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T06:16:21Z\",\n \"avg_ns\": 351791777370,\n \"stddev_ns\": 1169721955,\n \"avg_ts\": 1.457010,\n \"stddev_ts\": 0.059228,\n \"samples_ns\": [ 337532944870, 351724200822, 366118186420 ],\n \"samples_ts\": [ 1.51689, 1.45569, 1.39846 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T06:14:26Z", - "avg_ns": 23971950023, - "stddev_ns": 75897827, - "avg_ts": 5.33961, - "stddev_ts": 0.016888, - "samples_ns": [ - 24055322415, - 23906871190, - 23953656466 - ], - "samples_ts": [ - 5.32107, - 5.35411, - 5.34365 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-10T06:16:21Z", - "avg_ns": 351791777370, - "stddev_ns": 1169721955, - "avg_ts": 1.45701, - "stddev_ts": 0.059228, - "samples_ns": [ - 337532944870, - 351724200822, - 366118186420 - ], - "samples_ts": [ - 1.51689, - 1.45569, - 1.39846 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 537 - }, - { - "timestamp_utc": "2025-12-10T06:46:46.336014+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T06:33:59Z\",\n \"avg_ns\": 122674889102,\n \"stddev_ns\": 3891015660,\n \"avg_ts\": 4.307246,\n \"stddev_ts\": 0.932020,\n \"samples_ns\": [ 120625275480, 150227836037, 97171555791 ],\n \"samples_ts\": [ 4.24455, 3.40816, 5.26903 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T06:42:15Z\",\n \"avg_ns\": 90071053466,\n \"stddev_ns\": 1178651748,\n \"avg_ts\": 1.499924,\n \"stddev_ts\": 0.436188,\n \"samples_ns\": [ 114086937700, 91388266232, 64737956467 ],\n \"samples_ts\": [ 1.12195, 1.40062, 1.9772 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T06:33:59Z", - "avg_ns": 122674889102, - "stddev_ns": 3891015660, - "avg_ts": 4.307246, - "stddev_ts": 0.93202, - "samples_ns": [ - 120625275480, - 150227836037, - 97171555791 - ], - "samples_ts": [ - 4.24455, - 3.40816, - 5.26903 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-10T06:42:15Z", - "avg_ns": 90071053466, - "stddev_ns": 1178651748, - "avg_ts": 1.499924, - "stddev_ts": 0.436188, - "samples_ns": [ - 114086937700, - 91388266232, - 64737956467 - ], - "samples_ts": [ - 1.12195, - 1.40062, - 1.9772 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 538 - }, - { - "timestamp_utc": "2025-12-10T07:12:41.320977+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T06:46:47Z\",\n \"avg_ns\": 129175250753,\n \"stddev_ns\": 4248870805,\n \"avg_ts\": 3.999399,\n \"stddev_ts\": 0.448217,\n \"samples_ns\": [ 120709069528, 119782752811, 147033929921 ],\n \"samples_ts\": [ 4.2416, 4.27441, 3.48219 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T06:55:40Z\",\n \"avg_ns\": 339948806117,\n \"stddev_ns\": 1560117533,\n \"avg_ts\": 1.506368,\n \"stddev_ts\": 0.024088,\n \"samples_ns\": [ 336796451312, 336765668735, 346284298304 ],\n \"samples_ts\": [ 1.52021, 1.52034, 1.47855 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T06:46:47Z", - "avg_ns": 129175250753, - "stddev_ns": 4248870805, - "avg_ts": 3.999399, - "stddev_ts": 0.448217, - "samples_ns": [ - 120709069528, - 119782752811, - 147033929921 - ], - "samples_ts": [ - 4.2416, - 4.27441, - 3.48219 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-10T06:55:40Z", - "avg_ns": 339948806117, - "stddev_ns": 1560117533, - "avg_ts": 1.506368, - "stddev_ts": 0.024088, - "samples_ns": [ - 336796451312, - 336765668735, - 346284298304 - ], - "samples_ts": [ - 1.52021, - 1.52034, - 1.47855 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q8_0", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 539 - }, - { - "timestamp_utc": "2025-12-10T07:19:05.143400+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T07:12:43Z\",\n \"avg_ns\": 36988853765,\n \"stddev_ns\": 3879791179,\n \"avg_ts\": 3.623834,\n \"stddev_ts\": 1.015625,\n \"samples_ns\": [ 42151626978, 42129235958, 26685698360 ],\n \"samples_ts\": [ 3.03666, 3.03827, 4.79658 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T07:15:16Z\",\n \"avg_ns\": 76144409386,\n \"stddev_ns\": 3343188169,\n \"avg_ts\": 2.268538,\n \"stddev_ts\": 1.408952,\n \"samples_ns\": [ 34022218830, 61537097825, 132873911504 ],\n \"samples_ts\": [ 3.76225, 2.08005, 0.963319 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T07:12:43Z", - "avg_ns": 36988853765, - "stddev_ns": 3879791179, - "avg_ts": 3.623834, - "stddev_ts": 1.015625, - "samples_ns": [ - 42151626978, - 42129235958, - 26685698360 - ], - "samples_ts": [ - 3.03666, - 3.03827, - 4.79658 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-10T07:15:16Z", - "avg_ns": 76144409386, - "stddev_ns": 3343188169, - "avg_ts": 2.268538, - "stddev_ts": 1.408952, - "samples_ns": [ - 34022218830, - 61537097825, - 132873911504 - ], - "samples_ts": [ - 3.76225, - 2.08005, - 0.963319 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 540 - }, - { - "timestamp_utc": "2025-12-10T07:36:22.129779+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T07:19:06Z\",\n \"avg_ns\": 17138282125,\n \"stddev_ns\": 4269000930,\n \"avg_ts\": 8.580173,\n \"stddev_ts\": 3.331518,\n \"samples_ns\": [ 27042076341, 12213702836, 12159067198 ],\n \"samples_ts\": [ 4.73336, 10.48, 10.5271 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T07:20:40Z\",\n \"avg_ns\": 313710901778,\n \"stddev_ns\": 2646946349,\n \"avg_ts\": 1.632153,\n \"stddev_ts\": 0.013705,\n \"samples_ns\": [ 312139673132, 312226107602, 316766924602 ],\n \"samples_ts\": [ 1.64029, 1.63984, 1.61633 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T07:19:06Z", - "avg_ns": 17138282125, - "stddev_ns": 4269000930, - "avg_ts": 8.580173, - "stddev_ts": 3.331518, - "samples_ns": [ - 27042076341, - 12213702836, - 12159067198 - ], - "samples_ts": [ - 4.73336, - 10.48, - 10.5271 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-10T07:20:40Z", - "avg_ns": 313710901778, - "stddev_ns": 2646946349, - "avg_ts": 1.632153, - "stddev_ts": 0.013705, - "samples_ns": [ - 312139673132, - 312226107602, - 316766924602 - ], - "samples_ts": [ - 1.64029, - 1.63984, - 1.61633 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 541 - }, - { - "timestamp_utc": "2025-12-10T07:48:29.806162+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T07:36:23Z\",\n \"avg_ns\": 115227117915,\n \"stddev_ns\": 120838052,\n \"avg_ts\": 4.873556,\n \"stddev_ts\": 1.597047,\n \"samples_ns\": [ 88892384862, 87806698020, 168982270864 ],\n \"samples_ts\": [ 5.75977, 5.83099, 3.0299 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T07:44:59Z\",\n \"avg_ns\": 69800315813,\n \"stddev_ns\": 2255928709,\n \"avg_ts\": 2.508003,\n \"stddev_ts\": 1.337981,\n \"samples_ns\": [ 41415423946, 37038339711, 130947183783 ],\n \"samples_ts\": [ 3.09064, 3.45588, 0.977493 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T07:36:23Z", - "avg_ns": 115227117915, - "stddev_ns": 120838052, - "avg_ts": 4.873556, - "stddev_ts": 1.597047, - "samples_ns": [ - 88892384862, - 87806698020, - 168982270864 - ], - "samples_ts": [ - 5.75977, - 5.83099, - 3.0299 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-10T07:44:59Z", - "avg_ns": 69800315813, - "stddev_ns": 2255928709, - "avg_ts": 2.508003, - "stddev_ts": 1.337981, - "samples_ns": [ - 41415423946, - 37038339711, - 130947183783 - ], - "samples_ts": [ - 3.09064, - 3.45588, - 0.977493 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 542 - }, - { - "timestamp_utc": "2025-12-10T08:11:40.835578+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T07:48:31Z\",\n \"avg_ns\": 104392468396,\n \"stddev_ns\": 2478021510,\n \"avg_ts\": 5.910327,\n \"stddev_ts\": 2.857340,\n \"samples_ns\": [ 58489605129, 168464571833, 86223228226 ],\n \"samples_ts\": [ 8.75369, 3.03921, 5.93808 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T07:55:43Z\",\n \"avg_ns\": 318910421720,\n \"stddev_ns\": 2286255743,\n \"avg_ts\": 1.606664,\n \"stddev_ts\": 0.053202,\n \"samples_ns\": [ 312616674111, 312772954682, 331341636369 ],\n \"samples_ts\": [ 1.63779, 1.63697, 1.54523 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T07:48:31Z", - "avg_ns": 104392468396, - "stddev_ns": 2478021510, - "avg_ts": 5.910327, - "stddev_ts": 2.85734, - "samples_ns": [ - 58489605129, - 168464571833, - 86223228226 - ], - "samples_ts": [ - 8.75369, - 3.03921, - 5.93808 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-10T07:55:43Z", - "avg_ns": 318910421720, - "stddev_ns": 2286255743, - "avg_ts": 1.606664, - "stddev_ts": 0.053202, - "samples_ns": [ - 312616674111, - 312772954682, - 331341636369 - ], - "samples_ts": [ - 1.63779, - 1.63697, - 1.54523 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 543 - }, - { - "timestamp_utc": "2025-12-10T08:18:01.362985+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T08:11:42Z\",\n \"avg_ns\": 42075013817,\n \"stddev_ns\": 26930277,\n \"avg_ts\": 3.042186,\n \"stddev_ts\": 0.001947,\n \"samples_ns\": [ 42105042369, 42066993277, 42053005806 ],\n \"samples_ts\": [ 3.04002, 3.04277, 3.04378 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T08:14:31Z\",\n \"avg_ns\": 69632816879,\n \"stddev_ns\": 4149481985,\n \"avg_ts\": 2.320977,\n \"stddev_ts\": 1.336335,\n \"samples_ns\": [ 62167873179, 33961979468, 112768597990 ],\n \"samples_ts\": [ 2.05894, 3.76892, 1.13507 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T08:11:42Z", - "avg_ns": 42075013817, - "stddev_ns": 26930277, - "avg_ts": 3.042186, - "stddev_ts": 0.001947, - "samples_ns": [ - 42105042369, - 42066993277, - 42053005806 - ], - "samples_ts": [ - 3.04002, - 3.04277, - 3.04378 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-10T08:14:31Z", - "avg_ns": 69632816879, - "stddev_ns": 4149481985, - "avg_ts": 2.320977, - "stddev_ts": 1.336335, - "samples_ns": [ - 62167873179, - 33961979468, - 112768597990 - ], - "samples_ts": [ - 2.05894, - 3.76892, - 1.13507 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 544 - }, - { - "timestamp_utc": "2025-12-10T08:35:57.364522+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T08:18:03Z\",\n \"avg_ns\": 32166570258,\n \"stddev_ns\": 1923870133,\n \"avg_ts\": 5.364996,\n \"stddev_ts\": 3.992852,\n \"samples_ns\": [ 41708865764, 41959449583, 12831395429 ],\n \"samples_ts\": [ 3.06889, 3.05056, 9.97553 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T08:20:21Z\",\n \"avg_ns\": 311719246428,\n \"stddev_ns\": 97121137,\n \"avg_ts\": 1.642504,\n \"stddev_ts\": 0.000512,\n \"samples_ns\": [ 311831117065, 311656534370, 311670087850 ],\n \"samples_ts\": [ 1.64191, 1.64283, 1.64276 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T08:18:03Z", - "avg_ns": 32166570258, - "stddev_ns": 1923870133, - "avg_ts": 5.364996, - "stddev_ts": 3.992852, - "samples_ns": [ - 41708865764, - 41959449583, - 12831395429 - ], - "samples_ts": [ - 3.06889, - 3.05056, - 9.97553 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-10T08:20:21Z", - "avg_ns": 311719246428, - "stddev_ns": 97121137, - "avg_ts": 1.642504, - "stddev_ts": 0.000512, - "samples_ns": [ - 311831117065, - 311656534370, - 311670087850 - ], - "samples_ts": [ - 1.64191, - 1.64283, - 1.64276 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 545 - }, - { - "timestamp_utc": "2025-12-10T08:47:46.395362+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T08:35:58Z\",\n \"avg_ns\": 123585610856,\n \"stddev_ns\": 4278138532,\n \"avg_ts\": 5.117701,\n \"stddev_ts\": 3.168916,\n \"samples_ns\": [ 145938910216, 58391861587, 166426060766 ],\n \"samples_ts\": [ 3.50832, 8.76835, 3.07644 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T08:44:15Z\",\n \"avg_ns\": 69859567999,\n \"stddev_ns\": 3960628979,\n \"avg_ts\": 2.250974,\n \"stddev_ts\": 1.328001,\n \"samples_ns\": [ 73706774079, 34049625993, 101822303925 ],\n \"samples_ts\": [ 1.73661, 3.75922, 1.25709 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T08:35:58Z", - "avg_ns": 123585610856, - "stddev_ns": 4278138532, - "avg_ts": 5.117701, - "stddev_ts": 3.168916, - "samples_ns": [ - 145938910216, - 58391861587, - 166426060766 - ], - "samples_ts": [ - 3.50832, - 8.76835, - 3.07644 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-10T08:44:15Z", - "avg_ns": 69859567999, - "stddev_ns": 3960628979, - "avg_ts": 2.250974, - "stddev_ts": 1.328001, - "samples_ns": [ - 73706774079, - 34049625993, - 101822303925 - ], - "samples_ts": [ - 1.73661, - 3.75922, - 1.25709 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 546 - }, - { - "timestamp_utc": "2025-12-10T09:11:02.159587+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T08:47:48Z\",\n \"avg_ns\": 104083348198,\n \"stddev_ns\": 3871761299,\n \"avg_ts\": 6.120569,\n \"stddev_ts\": 3.829668,\n \"samples_ns\": [ 48660344791, 144260889578, 119328810226 ],\n \"samples_ts\": [ 10.5219, 3.54913, 4.29067 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T08:55:29Z\",\n \"avg_ns\": 310695042665,\n \"stddev_ns\": 3705977971,\n \"avg_ts\": 1.647970,\n \"stddev_ts\": 0.011310,\n \"samples_ns\": [ 311903982823, 311938476221, 308242668953 ],\n \"samples_ts\": [ 1.64153, 1.64135, 1.66103 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T08:47:48Z", - "avg_ns": 104083348198, - "stddev_ns": 3871761299, - "avg_ts": 6.120569, - "stddev_ts": 3.829668, - "samples_ns": [ - 48660344791, - 144260889578, - 119328810226 - ], - "samples_ts": [ - 10.5219, - 3.54913, - 4.29067 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-10T08:55:29Z", - "avg_ns": 310695042665, - "stddev_ns": 3705977971, - "avg_ts": 1.64797, - "stddev_ts": 0.01131, - "samples_ns": [ - 311903982823, - 311938476221, - 308242668953 - ], - "samples_ts": [ - 1.64153, - 1.64135, - 1.66103 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 547 - }, - { - "timestamp_utc": "2025-12-10T09:17:04.044071+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T09:11:03Z\",\n \"avg_ns\": 41826457672,\n \"stddev_ns\": 45542451,\n \"avg_ts\": 3.060266,\n \"stddev_ts\": 0.003334,\n \"samples_ns\": [ 41775770796, 41863932129, 41839670093 ],\n \"samples_ts\": [ 3.06398, 3.05752, 3.0593 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T09:13:33Z\",\n \"avg_ns\": 69844136181,\n \"stddev_ns\": 4145708751,\n \"avg_ts\": 2.252154,\n \"stddev_ts\": 1.332931,\n \"samples_ns\": [ 101440355210, 33973137764, 74118915569 ],\n \"samples_ts\": [ 1.26183, 3.76768, 1.72695 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T09:11:03Z", - "avg_ns": 41826457672, - "stddev_ns": 45542451, - "avg_ts": 3.060266, - "stddev_ts": 0.003334, - "samples_ns": [ - 41775770796, - 41863932129, - 41839670093 - ], - "samples_ts": [ - 3.06398, - 3.05752, - 3.0593 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-10T09:13:33Z", - "avg_ns": 69844136181, - "stddev_ns": 4145708751, - "avg_ts": 2.252154, - "stddev_ts": 1.332931, - "samples_ns": [ - 101440355210, - 33973137764, - 74118915569 - ], - "samples_ts": [ - 1.26183, - 3.76768, - 1.72695 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 548 - }, - { - "timestamp_utc": "2025-12-10T09:35:39.270953+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T09:17:05Z\",\n \"avg_ns\": 42013703862,\n \"stddev_ns\": 63471857,\n \"avg_ts\": 3.046630,\n \"stddev_ts\": 0.004603,\n \"samples_ns\": [ 42076781184, 41949844692, 42014485710 ],\n \"samples_ts\": [ 3.04206, 3.05126, 3.04657 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T09:19:54Z\",\n \"avg_ns\": 314661939982,\n \"stddev_ns\": 4133785063,\n \"avg_ts\": 1.627329,\n \"stddev_ts\": 0.021219,\n \"samples_ns\": [ 319430752750, 312456410402, 312098656796 ],\n \"samples_ts\": [ 1.60285, 1.63863, 1.64051 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T09:17:05Z", - "avg_ns": 42013703862, - "stddev_ns": 63471857, - "avg_ts": 3.04663, - "stddev_ts": 0.004603, - "samples_ns": [ - 42076781184, - 41949844692, - 42014485710 - ], - "samples_ts": [ - 3.04206, - 3.05126, - 3.04657 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-10T09:19:54Z", - "avg_ns": 314661939982, - "stddev_ns": 4133785063, - "avg_ts": 1.627329, - "stddev_ts": 0.021219, - "samples_ns": [ - 319430752750, - 312456410402, - 312098656796 - ], - "samples_ts": [ - 1.60285, - 1.63863, - 1.64051 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 549 - }, - { - "timestamp_utc": "2025-12-10T09:46:29.482327+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T09:35:40Z\",\n \"avg_ns\": 114638464568,\n \"stddev_ns\": 2416383815,\n \"avg_ts\": 5.715659,\n \"stddev_ts\": 3.753549,\n \"samples_ns\": [ 166938908553, 51143171288, 125833313865 ],\n \"samples_ts\": [ 3.06699, 10.0111, 4.06887 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T09:42:59Z\",\n \"avg_ns\": 69665578939,\n \"stddev_ns\": 1613867823,\n \"avg_ts\": 2.479386,\n \"stddev_ts\": 1.388150,\n \"samples_ns\": [ 126959333986, 33987983974, 48049418859 ],\n \"samples_ts\": [ 1.0082, 3.76604, 2.66392 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T09:35:40Z", - "avg_ns": 114638464568, - "stddev_ns": 2416383815, - "avg_ts": 5.715659, - "stddev_ts": 3.753549, - "samples_ns": [ - 166938908553, - 51143171288, - 125833313865 - ], - "samples_ts": [ - 3.06699, - 10.0111, - 4.06887 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-10T09:42:59Z", - "avg_ns": 69665578939, - "stddev_ns": 1613867823, - "avg_ts": 2.479386, - "stddev_ts": 1.38815, - "samples_ns": [ - 126959333986, - 33987983974, - 48049418859 - ], - "samples_ts": [ - 1.0082, - 3.76604, - 2.66392 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 550 - }, - { - "timestamp_utc": "2025-12-10T10:10:40.289614+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T09:46:31Z\",\n \"avg_ns\": 114366650448,\n \"stddev_ns\": 4269850228,\n \"avg_ts\": 4.914410,\n \"stddev_ts\": 1.647477,\n \"samples_ns\": [ 82109951003, 94130969772, 166859030570 ],\n \"samples_ts\": [ 6.23554, 5.43923, 3.06846 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T09:55:01Z\",\n \"avg_ns\": 312556746525,\n \"stddev_ns\": 1198303510,\n \"avg_ts\": 1.638119,\n \"stddev_ts\": 0.006267,\n \"samples_ns\": [ 313938443336, 311930055128, 311801741113 ],\n \"samples_ts\": [ 1.63089, 1.64139, 1.64207 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T09:46:31Z", - "avg_ns": 114366650448, - "stddev_ns": 4269850228, - "avg_ts": 4.91441, - "stddev_ts": 1.647477, - "samples_ns": [ - 82109951003, - 94130969772, - 166859030570 - ], - "samples_ts": [ - 6.23554, - 5.43923, - 3.06846 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-10T09:55:01Z", - "avg_ns": 312556746525, - "stddev_ns": 1198303510, - "avg_ts": 1.638119, - "stddev_ts": 0.006267, - "samples_ns": [ - 313938443336, - 311930055128, - 311801741113 - ], - "samples_ts": [ - 1.63089, - 1.64139, - 1.64207 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 551 - }, - { - "timestamp_utc": "2025-12-10T10:16:03.977762+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T10:10:41Z\",\n \"avg_ns\": 28348117258,\n \"stddev_ns\": 3801870360,\n \"avg_ts\": 5.867990,\n \"stddev_ts\": 3.945503,\n \"samples_ns\": [ 12338705717, 30490585259, 42215060798 ],\n \"samples_ts\": [ 10.3739, 4.19802, 3.03209 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T10:12:18Z\",\n \"avg_ns\": 74734206067,\n \"stddev_ns\": 4184899326,\n \"avg_ts\": 2.313206,\n \"stddev_ts\": 1.424631,\n \"samples_ns\": [ 131140549199, 59480321484, 33581747518 ],\n \"samples_ts\": [ 0.976052, 2.15197, 3.81159 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T10:10:41Z", - "avg_ns": 28348117258, - "stddev_ns": 3801870360, - "avg_ts": 5.86799, - "stddev_ts": 3.945503, - "samples_ns": [ - 12338705717, - 30490585259, - 42215060798 - ], - "samples_ts": [ - 10.3739, - 4.19802, - 3.03209 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-10T10:12:18Z", - "avg_ns": 74734206067, - "stddev_ns": 4184899326, - "avg_ts": 2.313206, - "stddev_ts": 1.424631, - "samples_ns": [ - 131140549199, - 59480321484, - 33581747518 - ], - "samples_ts": [ - 0.976052, - 2.15197, - 3.81159 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 552 - }, - { - "timestamp_utc": "2025-12-10T10:35:21.122986+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T10:16:04Z\",\n \"avg_ns\": 42245737164,\n \"stddev_ns\": 175007492,\n \"avg_ts\": 3.029926,\n \"stddev_ts\": 0.012581,\n \"samples_ns\": [ 42359790429, 42333180148, 42044240916 ],\n \"samples_ts\": [ 3.02173, 3.02363, 3.04441 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T10:18:40Z\",\n \"avg_ns\": 333151736012,\n \"stddev_ns\": 2321629369,\n \"avg_ts\": 1.540042,\n \"stddev_ts\": 0.087185,\n \"samples_ns\": [ 347408620263, 339624297325, 312422290449 ],\n \"samples_ts\": [ 1.47377, 1.50755, 1.63881 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T10:16:04Z", - "avg_ns": 42245737164, - "stddev_ns": 175007492, - "avg_ts": 3.029926, - "stddev_ts": 0.012581, - "samples_ns": [ - 42359790429, - 42333180148, - 42044240916 - ], - "samples_ts": [ - 3.02173, - 3.02363, - 3.04441 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-10T10:18:40Z", - "avg_ns": 333151736012, - "stddev_ns": 2321629369, - "avg_ts": 1.540042, - "stddev_ts": 0.087185, - "samples_ns": [ - 347408620263, - 339624297325, - 312422290449 - ], - "samples_ts": [ - 1.47377, - 1.50755, - 1.63881 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 553 - }, - { - "timestamp_utc": "2025-12-10T10:45:59.850464+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T10:35:22Z\",\n \"avg_ns\": 114700545549,\n \"stddev_ns\": 4205011862,\n \"avg_ts\": 4.901090,\n \"stddev_ts\": 1.644093,\n \"samples_ns\": [ 167423617689, 94323923605, 82354095355 ],\n \"samples_ts\": [ 3.05811, 5.4281, 6.21706 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T10:41:57Z\",\n \"avg_ns\": 80286501311,\n \"stddev_ns\": 1567168385,\n \"avg_ts\": 2.150186,\n \"stddev_ts\": 1.443376,\n \"samples_ns\": [ 132772095927, 74019767652, 34067640355 ],\n \"samples_ts\": [ 0.964058, 1.72927, 3.75723 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T10:35:22Z", - "avg_ns": 114700545549, - "stddev_ns": 4205011862, - "avg_ts": 4.90109, - "stddev_ts": 1.644093, - "samples_ns": [ - 167423617689, - 94323923605, - 82354095355 - ], - "samples_ts": [ - 3.05811, - 5.4281, - 6.21706 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-10T10:41:57Z", - "avg_ns": 80286501311, - "stddev_ns": 1567168385, - "avg_ts": 2.150186, - "stddev_ts": 1.443376, - "samples_ns": [ - 132772095927, - 74019767652, - 34067640355 - ], - "samples_ts": [ - 0.964058, - 1.72927, - 3.75723 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 554 - }, - { - "timestamp_utc": "2025-12-10T11:10:30.222646+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T10:46:00Z\",\n \"avg_ns\": 115532182895,\n \"stddev_ns\": 1465456826,\n \"avg_ts\": 5.362748,\n \"stddev_ts\": 3.008657,\n \"samples_ns\": [ 118948303127, 58463685247, 169184560312 ],\n \"samples_ts\": [ 4.30439, 8.75757, 3.02628 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T10:54:13Z\",\n \"avg_ns\": 325255961114,\n \"stddev_ns\": 4167973753,\n \"avg_ts\": 1.577770,\n \"stddev_ts\": 0.091081,\n \"samples_ns\": [ 347661956153, 314802666248, 313303260942 ],\n \"samples_ts\": [ 1.47269, 1.62642, 1.6342 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T10:46:00Z", - "avg_ns": 115532182895, - "stddev_ns": 1465456826, - "avg_ts": 5.362748, - "stddev_ts": 3.008657, - "samples_ns": [ - 118948303127, - 58463685247, - 169184560312 - ], - "samples_ts": [ - 4.30439, - 8.75757, - 3.02628 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-10T10:54:13Z", - "avg_ns": 325255961114, - "stddev_ns": 4167973753, - "avg_ts": 1.57777, - "stddev_ts": 0.091081, - "samples_ns": [ - 347661956153, - 314802666248, - 313303260942 - ], - "samples_ts": [ - 1.47269, - 1.62642, - 1.6342 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 555 - }, - { - "timestamp_utc": "2025-12-10T11:15:54.471187+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T11:10:31Z\",\n \"avg_ns\": 19922996524,\n \"stddev_ns\": 4046033078,\n \"avg_ts\": 8.043971,\n \"stddev_ts\": 3.784352,\n \"samples_ns\": [ 12156994501, 12910528491, 34701466580 ],\n \"samples_ts\": [ 10.5289, 9.91439, 3.6886 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T11:11:43Z\",\n \"avg_ns\": 83356875210,\n \"stddev_ns\": 1516127477,\n \"avg_ts\": 2.098632,\n \"stddev_ts\": 1.517637,\n \"samples_ns\": [ 130782875283, 85831394029, 33456356318 ],\n \"samples_ts\": [ 0.978721, 1.4913, 3.82588 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T11:10:31Z", - "avg_ns": 19922996524, - "stddev_ns": 4046033078, - "avg_ts": 8.043971, - "stddev_ts": 3.784352, - "samples_ns": [ - 12156994501, - 12910528491, - 34701466580 - ], - "samples_ts": [ - 10.5289, - 9.91439, - 3.6886 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-10T11:11:43Z", - "avg_ns": 83356875210, - "stddev_ns": 1516127477, - "avg_ts": 2.098632, - "stddev_ts": 1.517637, - "samples_ns": [ - 130782875283, - 85831394029, - 33456356318 - ], - "samples_ts": [ - 0.978721, - 1.4913, - 3.82588 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 556 - }, - { - "timestamp_utc": "2025-12-10T11:35:12.512505+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T11:15:55Z\",\n \"avg_ns\": 39597219152,\n \"stddev_ns\": 3170809201,\n \"avg_ts\": 3.261108,\n \"stddev_ts\": 0.386328,\n \"samples_ns\": [ 34527417332, 42147453656, 42116786469 ],\n \"samples_ts\": [ 3.7072, 3.03696, 3.03917 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T11:18:07Z\",\n \"avg_ns\": 341352908367,\n \"stddev_ns\": 3828608661,\n \"avg_ts\": 1.501093,\n \"stddev_ts\": 0.052031,\n \"samples_ns\": [ 348248979225, 347849481300, 327960264577 ],\n \"samples_ts\": [ 1.47021, 1.4719, 1.56116 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T11:15:55Z", - "avg_ns": 39597219152, - "stddev_ns": 3170809201, - "avg_ts": 3.261108, - "stddev_ts": 0.386328, - "samples_ns": [ - 34527417332, - 42147453656, - 42116786469 - ], - "samples_ts": [ - 3.7072, - 3.03696, - 3.03917 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-10T11:18:07Z", - "avg_ns": 341352908367, - "stddev_ns": 3828608661, - "avg_ts": 1.501093, - "stddev_ts": 0.052031, - "samples_ns": [ - 348248979225, - 347849481300, - 327960264577 - ], - "samples_ts": [ - 1.47021, - 1.4719, - 1.56116 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 557 - }, - { - "timestamp_utc": "2025-12-10T11:45:50.163303+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T11:35:13Z\",\n \"avg_ns\": 106563267546,\n \"stddev_ns\": 4276650501,\n \"avg_ts\": 5.645734,\n \"stddev_ts\": 2.990336,\n \"samples_ns\": [ 145880659778, 117317267409, 56491875453 ],\n \"samples_ts\": [ 3.50972, 4.36423, 9.06325 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T11:41:21Z\",\n \"avg_ns\": 89053945936,\n \"stddev_ns\": 4162335331,\n \"avg_ts\": 2.005253,\n \"stddev_ts\": 1.547116,\n \"samples_ns\": [ 131813656118, 101520275951, 33827905740 ],\n \"samples_ts\": [ 0.971068, 1.26083, 3.78386 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T11:35:13Z", - "avg_ns": 106563267546, - "stddev_ns": 4276650501, - "avg_ts": 5.645734, - "stddev_ts": 2.990336, - "samples_ns": [ - 145880659778, - 117317267409, - 56491875453 - ], - "samples_ts": [ - 3.50972, - 4.36423, - 9.06325 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-10T11:41:21Z", - "avg_ns": 89053945936, - "stddev_ns": 4162335331, - "avg_ts": 2.005253, - "stddev_ts": 1.547116, - "samples_ns": [ - 131813656118, - 101520275951, - 33827905740 - ], - "samples_ts": [ - 0.971068, - 1.26083, - 3.78386 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 558 - }, - { - "timestamp_utc": "2025-12-10T12:10:19.224259+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T11:45:51Z\",\n \"avg_ns\": 113711370829,\n \"stddev_ns\": 3552695697,\n \"avg_ts\": 5.845454,\n \"stddev_ts\": 4.060061,\n \"samples_ns\": [ 143672675238, 48608849088, 148852588162 ],\n \"samples_ts\": [ 3.56366, 10.5331, 3.43964 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T11:53:32Z\",\n \"avg_ns\": 335257716830,\n \"stddev_ns\": 805553967,\n \"avg_ts\": 1.530919,\n \"stddev_ts\": 0.094247,\n \"samples_ns\": [ 346506066603, 347022040585, 312245043303 ],\n \"samples_ts\": [ 1.47761, 1.47541, 1.63974 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T11:45:51Z", - "avg_ns": 113711370829, - "stddev_ns": 3552695697, - "avg_ts": 5.845454, - "stddev_ts": 4.060061, - "samples_ns": [ - 143672675238, - 48608849088, - 148852588162 - ], - "samples_ts": [ - 3.56366, - 10.5331, - 3.43964 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-10T11:53:32Z", - "avg_ns": 335257716830, - "stddev_ns": 805553967, - "avg_ts": 1.530919, - "stddev_ts": 0.094247, - "samples_ns": [ - 346506066603, - 347022040585, - 312245043303 - ], - "samples_ts": [ - 1.47761, - 1.47541, - 1.63974 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 559 - }, - { - "timestamp_utc": "2025-12-10T12:15:44.318774+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T12:10:20Z\",\n \"avg_ns\": 12537936457,\n \"stddev_ns\": 588496656,\n \"avg_ts\": 10.223637,\n \"stddev_ts\": 0.467233,\n \"samples_ns\": [ 12186371395, 12210102275, 13217335703 ],\n \"samples_ts\": [ 10.5035, 10.4831, 9.68425 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T12:11:09Z\",\n \"avg_ns\": 91270160442,\n \"stddev_ns\": 3860598298,\n \"avg_ts\": 1.988421,\n \"stddev_ts\": 1.597516,\n \"samples_ns\": [ 124795318124, 115616371866, 33398791337 ],\n \"samples_ts\": [ 1.02568, 1.10711, 3.83247 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T12:10:20Z", - "avg_ns": 12537936457, - "stddev_ns": 588496656, - "avg_ts": 10.223637, - "stddev_ts": 0.467233, - "samples_ns": [ - 12186371395, - 12210102275, - 13217335703 - ], - "samples_ts": [ - 10.5035, - 10.4831, - 9.68425 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-10T12:11:09Z", - "avg_ns": 91270160442, - "stddev_ns": 3860598298, - "avg_ts": 1.988421, - "stddev_ts": 1.597516, - "samples_ns": [ - 124795318124, - 115616371866, - 33398791337 - ], - "samples_ts": [ - 1.02568, - 1.10711, - 3.83247 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 560 - }, - { - "timestamp_utc": "2025-12-10T12:35:05.063794+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T12:15:45Z\",\n \"avg_ns\": 31399759114,\n \"stddev_ns\": 293801391,\n \"avg_ts\": 5.249530,\n \"stddev_ts\": 3.578793,\n \"samples_ns\": [ 13648429896, 38388170328, 42162677120 ],\n \"samples_ts\": [ 9.37837, 3.33436, 3.03586 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T12:17:31Z\",\n \"avg_ns\": 350615104604,\n \"stddev_ns\": 2503672059,\n \"avg_ts\": 1.460414,\n \"stddev_ts\": 0.016496,\n \"samples_ns\": [ 346096268789, 352454002274, 353295042751 ],\n \"samples_ts\": [ 1.47936, 1.45267, 1.44921 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T12:15:45Z", - "avg_ns": 31399759114, - "stddev_ns": 293801391, - "avg_ts": 5.24953, - "stddev_ts": 3.578793, - "samples_ns": [ - 13648429896, - 38388170328, - 42162677120 - ], - "samples_ts": [ - 9.37837, - 3.33436, - 3.03586 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-10T12:17:31Z", - "avg_ns": 350615104604, - "stddev_ns": 2503672059, - "avg_ts": 1.460414, - "stddev_ts": 0.016496, - "samples_ns": [ - 346096268789, - 352454002274, - 353295042751 - ], - "samples_ts": [ - 1.47936, - 1.45267, - 1.44921 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 561 - }, - { - "timestamp_utc": "2025-12-10T12:45:44.872140+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T12:35:06Z\",\n \"avg_ns\": 104231768254,\n \"stddev_ns\": 3431123056,\n \"avg_ts\": 6.051484,\n \"stddev_ts\": 3.742152,\n \"samples_ns\": [ 127900939808, 135424226216, 49370138738 ],\n \"samples_ts\": [ 4.0031, 3.78071, 10.3706 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T12:41:07Z\",\n \"avg_ns\": 92215661288,\n \"stddev_ns\": 3254229682,\n \"avg_ts\": 1.954339,\n \"stddev_ts\": 1.556008,\n \"samples_ns\": [ 123895062274, 118626471014, 34125450577 ],\n \"samples_ts\": [ 1.03313, 1.07902, 3.75087 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T12:35:06Z", - "avg_ns": 104231768254, - "stddev_ns": 3431123056, - "avg_ts": 6.051484, - "stddev_ts": 3.742152, - "samples_ns": [ - 127900939808, - 135424226216, - 49370138738 - ], - "samples_ts": [ - 4.0031, - 3.78071, - 10.3706 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-10T12:41:07Z", - "avg_ns": 92215661288, - "stddev_ns": 3254229682, - "avg_ts": 1.954339, - "stddev_ts": 1.556008, - "samples_ns": [ - 123895062274, - 118626471014, - 34125450577 - ], - "samples_ts": [ - 1.03313, - 1.07902, - 3.75087 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 562 - }, - { - "timestamp_utc": "2025-12-10T13:10:18.175104+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T12:45:45Z\",\n \"avg_ns\": 114808003795,\n \"stddev_ns\": 1454191006,\n \"avg_ts\": 5.807928,\n \"stddev_ts\": 4.042668,\n \"samples_ns\": [ 156360677061, 48900929466, 139162404859 ],\n \"samples_ts\": [ 3.27448, 10.4701, 3.67915 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T12:53:18Z\",\n \"avg_ns\": 339588950518,\n \"stddev_ns\": 634311477,\n \"avg_ts\": 1.511976,\n \"stddev_ts\": 0.100269,\n \"samples_ns\": [ 353169364970, 351032601425, 314564885159 ],\n \"samples_ts\": [ 1.44973, 1.45855, 1.62765 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T12:45:45Z", - "avg_ns": 114808003795, - "stddev_ns": 1454191006, - "avg_ts": 5.807928, - "stddev_ts": 4.042668, - "samples_ns": [ - 156360677061, - 48900929466, - 139162404859 - ], - "samples_ts": [ - 3.27448, - 10.4701, - 3.67915 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-10T12:53:18Z", - "avg_ns": 339588950518, - "stddev_ns": 634311477, - "avg_ts": 1.511976, - "stddev_ts": 0.100269, - "samples_ns": [ - 353169364970, - 351032601425, - 314564885159 - ], - "samples_ts": [ - 1.44973, - 1.45855, - 1.62765 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 563 - }, - { - "timestamp_utc": "2025-12-10T13:15:45.212228+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T13:10:19Z\",\n \"avg_ns\": 12358525100,\n \"stddev_ns\": 392407408,\n \"avg_ts\": 10.364064,\n \"stddev_ts\": 0.323184,\n \"samples_ns\": [ 12144636941, 12119532133, 12811406228 ],\n \"samples_ts\": [ 10.5396, 10.5615, 9.9911 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T13:11:08Z\",\n \"avg_ns\": 92078636602,\n \"stddev_ns\": 902487694,\n \"avg_ts\": 1.958394,\n \"stddev_ts\": 1.560054,\n \"samples_ns\": [ 124489067866, 117699426673, 34047415269 ],\n \"samples_ts\": [ 1.0282, 1.08752, 3.75946 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T13:10:19Z", - "avg_ns": 12358525100, - "stddev_ns": 392407408, - "avg_ts": 10.364064, - "stddev_ts": 0.323184, - "samples_ns": [ - 12144636941, - 12119532133, - 12811406228 - ], - "samples_ts": [ - 10.5396, - 10.5615, - 9.9911 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-10T13:11:08Z", - "avg_ns": 92078636602, - "stddev_ns": 902487694, - "avg_ts": 1.958394, - "stddev_ts": 1.560054, - "samples_ns": [ - 124489067866, - 117699426673, - 34047415269 - ], - "samples_ts": [ - 1.0282, - 1.08752, - 3.75946 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 564 - }, - { - "timestamp_utc": "2025-12-10T13:35:07.870768+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T13:15:46Z\",\n \"avg_ns\": 31977283537,\n \"stddev_ns\": 3976410975,\n \"avg_ts\": 5.084745,\n \"stddev_ts\": 3.374493,\n \"samples_ns\": [ 14254436556, 39574470381, 42102943674 ],\n \"samples_ts\": [ 8.97966, 3.23441, 3.04017 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T13:17:34Z\",\n \"avg_ns\": 350740455362,\n \"stddev_ns\": 3696040031,\n \"avg_ts\": 1.459877,\n \"stddev_ts\": 0.015462,\n \"samples_ns\": [ 353566749374, 352096741121, 346557875593 ],\n \"samples_ts\": [ 1.4481, 1.45415, 1.47739 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T13:15:46Z", - "avg_ns": 31977283537, - "stddev_ns": 3976410975, - "avg_ts": 5.084745, - "stddev_ts": 3.374493, - "samples_ns": [ - 14254436556, - 39574470381, - 42102943674 - ], - "samples_ts": [ - 8.97966, - 3.23441, - 3.04017 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-10T13:17:34Z", - "avg_ns": 350740455362, - "stddev_ns": 3696040031, - "avg_ts": 1.459877, - "stddev_ts": 0.015462, - "samples_ns": [ - 353566749374, - 352096741121, - 346557875593 - ], - "samples_ts": [ - 1.4481, - 1.45415, - 1.47739 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 565 - }, - { - "timestamp_utc": "2025-12-10T13:45:46.979919+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T13:35:08Z\",\n \"avg_ns\": 105245270678,\n \"stddev_ns\": 4063170791,\n \"avg_ts\": 5.915384,\n \"stddev_ts\": 3.533786,\n \"samples_ns\": [ 137161290045, 127335883604, 51238638385 ],\n \"samples_ts\": [ 3.73283, 4.02086, 9.99246 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T13:41:13Z\",\n \"avg_ns\": 90911457629,\n \"stddev_ns\": 4282075788,\n \"avg_ts\": 1.990128,\n \"stddev_ts\": 1.581878,\n \"samples_ns\": [ 129914747232, 109255126637, 33564499019 ],\n \"samples_ts\": [ 0.985262, 1.17157, 3.81355 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T13:35:08Z", - "avg_ns": 105245270678, - "stddev_ns": 4063170791, - "avg_ts": 5.915384, - "stddev_ts": 3.533786, - "samples_ns": [ - 137161290045, - 127335883604, - 51238638385 - ], - "samples_ts": [ - 3.73283, - 4.02086, - 9.99246 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-10T13:41:13Z", - "avg_ns": 90911457629, - "stddev_ns": 4282075788, - "avg_ts": 1.990128, - "stddev_ts": 1.581878, - "samples_ns": [ - 129914747232, - 109255126637, - 33564499019 - ], - "samples_ts": [ - 0.985262, - 1.17157, - 3.81355 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 566 - }, - { - "timestamp_utc": "2025-12-10T14:10:11.776053+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T13:45:47Z\",\n \"avg_ns\": 115481331628,\n \"stddev_ns\": 3384196718,\n \"avg_ts\": 5.796804,\n \"stddev_ts\": 4.081618,\n \"samples_ns\": [ 151580197706, 48718785717, 146145011461 ],\n \"samples_ts\": [ 3.37775, 10.5093, 3.50337 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T13:53:27Z\",\n \"avg_ns\": 334310186293,\n \"stddev_ns\": 2051411892,\n \"avg_ts\": 1.532495,\n \"stddev_ts\": 0.047968,\n \"samples_ns\": [ 340617688849, 339864301639, 322448568393 ],\n \"samples_ts\": [ 1.50315, 1.50648, 1.58785 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T13:45:47Z", - "avg_ns": 115481331628, - "stddev_ns": 3384196718, - "avg_ts": 5.796804, - "stddev_ts": 4.081618, - "samples_ns": [ - 151580197706, - 48718785717, - 146145011461 - ], - "samples_ts": [ - 3.37775, - 10.5093, - 3.50337 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-10T13:53:27Z", - "avg_ns": 334310186293, - "stddev_ns": 2051411892, - "avg_ts": 1.532495, - "stddev_ts": 0.047968, - "samples_ns": [ - 340617688849, - 339864301639, - 322448568393 - ], - "samples_ts": [ - 1.50315, - 1.50648, - 1.58785 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 567 - }, - { - "timestamp_utc": "2025-12-10T14:15:39.719278+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T14:10:12Z\",\n \"avg_ns\": 12164330873,\n \"stddev_ns\": 25042231,\n \"avg_ts\": 10.522598,\n \"stddev_ts\": 0.021686,\n \"samples_ns\": [ 12181577680, 12175807518, 12135607422 ],\n \"samples_ts\": [ 10.5077, 10.5126, 10.5475 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T14:11:01Z\",\n \"avg_ns\": 92619322076,\n \"stddev_ns\": 928783954,\n \"avg_ts\": 1.917708,\n \"stddev_ts\": 1.476711,\n \"samples_ns\": [ 108472121242, 134006377726, 35379467261 ],\n \"samples_ts\": [ 1.18003, 0.955178, 3.61792 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T14:10:12Z", - "avg_ns": 12164330873, - "stddev_ns": 25042231, - "avg_ts": 10.522598, - "stddev_ts": 0.021686, - "samples_ns": [ - 12181577680, - 12175807518, - 12135607422 - ], - "samples_ts": [ - 10.5077, - 10.5126, - 10.5475 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-10T14:11:01Z", - "avg_ns": 92619322076, - "stddev_ns": 928783954, - "avg_ts": 1.917708, - "stddev_ts": 1.476711, - "samples_ns": [ - 108472121242, - 134006377726, - 35379467261 - ], - "samples_ts": [ - 1.18003, - 0.955178, - 3.61792 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 568 - }, - { - "timestamp_utc": "2025-12-10T14:34:39.220683+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T14:15:40Z\",\n \"avg_ns\": 27304779469,\n \"stddev_ns\": 1780390185,\n \"avg_ts\": 6.070139,\n \"stddev_ts\": 3.955565,\n \"samples_ns\": [ 12138374841, 27675780590, 42100182977 ],\n \"samples_ts\": [ 10.5451, 4.62498, 3.04037 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T14:17:14Z\",\n \"avg_ns\": 347645535700,\n \"stddev_ns\": 351118265,\n \"avg_ts\": 1.472765,\n \"stddev_ts\": 0.001488,\n \"samples_ns\": [ 347245226005, 347901351555, 347790029540 ],\n \"samples_ts\": [ 1.47446, 1.47168, 1.47215 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T14:15:40Z", - "avg_ns": 27304779469, - "stddev_ns": 1780390185, - "avg_ts": 6.070139, - "stddev_ts": 3.955565, - "samples_ns": [ - 12138374841, - 27675780590, - 42100182977 - ], - "samples_ts": [ - 10.5451, - 4.62498, - 3.04037 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-10T14:17:14Z", - "avg_ns": 347645535700, - "stddev_ns": 351118265, - "avg_ts": 1.472765, - "stddev_ts": 0.001488, - "samples_ns": [ - 347245226005, - 347901351555, - 347790029540 - ], - "samples_ts": [ - 1.47446, - 1.47168, - 1.47215 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 569 - }, - { - "timestamp_utc": "2025-12-10T14:45:37.587148+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T14:34:40Z\",\n \"avg_ns\": 104019300526,\n \"stddev_ns\": 4251608186,\n \"avg_ts\": 6.186438,\n \"stddev_ts\": 3.834540,\n \"samples_ns\": [ 108597442243, 154878216264, 48582243071 ],\n \"samples_ts\": [ 4.71466, 3.30582, 10.5388 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T14:40:59Z\",\n \"avg_ns\": 92692873657,\n \"stddev_ns\": 4176178178,\n \"avg_ts\": 1.785929,\n \"stddev_ts\": 1.190746,\n \"samples_ns\": [ 99986573451, 137406391449, 40685656073 ],\n \"samples_ts\": [ 1.28017, 0.931543, 3.14607 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T14:34:40Z", - "avg_ns": 104019300526, - "stddev_ns": 4251608186, - "avg_ts": 6.186438, - "stddev_ts": 3.83454, - "samples_ns": [ - 108597442243, - 154878216264, - 48582243071 - ], - "samples_ts": [ - 4.71466, - 3.30582, - 10.5388 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-10T14:40:59Z", - "avg_ns": 92692873657, - "stddev_ns": 4176178178, - "avg_ts": 1.785929, - "stddev_ts": 1.190746, - "samples_ns": [ - 99986573451, - 137406391449, - 40685656073 - ], - "samples_ts": [ - 1.28017, - 0.931543, - 3.14607 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 570 - }, - { - "timestamp_utc": "2025-12-10T15:09:48.532362+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T14:45:38Z\",\n \"avg_ns\": 113956732426,\n \"stddev_ns\": 4023771968,\n \"avg_ts\": 5.450458,\n \"stddev_ts\": 3.085048,\n \"samples_ns\": [ 166714710193, 57295568635, 117859918451 ],\n \"samples_ts\": [ 3.07111, 8.93612, 4.34414 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T14:52:48Z\",\n \"avg_ns\": 339515242700,\n \"stddev_ns\": 1241267665,\n \"avg_ts\": 1.508046,\n \"stddev_ts\": 0.005516,\n \"samples_ns\": [ 340705789650, 339611123006, 338228815445 ],\n \"samples_ts\": [ 1.50276, 1.50761, 1.51377 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T14:45:38Z", - "avg_ns": 113956732426, - "stddev_ns": 4023771968, - "avg_ts": 5.450458, - "stddev_ts": 3.085048, - "samples_ns": [ - 166714710193, - 57295568635, - 117859918451 - ], - "samples_ts": [ - 3.07111, - 8.93612, - 4.34414 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-10T14:52:48Z", - "avg_ns": 339515242700, - "stddev_ns": 1241267665, - "avg_ts": 1.508046, - "stddev_ts": 0.005516, - "samples_ns": [ - 340705789650, - 339611123006, - 338228815445 - ], - "samples_ts": [ - 1.50276, - 1.50761, - 1.51377 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 571 - }, - { - "timestamp_utc": "2025-12-10T15:15:27.907910+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T15:09:50Z\",\n \"avg_ns\": 12170000515,\n \"stddev_ns\": 36688025,\n \"avg_ts\": 10.517730,\n \"stddev_ts\": 0.031717,\n \"samples_ns\": [ 12132096632, 12172567753, 12205337162 ],\n \"samples_ts\": [ 10.5505, 10.5154, 10.4872 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T15:10:49Z\",\n \"avg_ns\": 92497267805,\n \"stddev_ns\": 4233275682,\n \"avg_ts\": 1.505948,\n \"stddev_ts\": 0.477660,\n \"samples_ns\": [ 74613244205, 133426732188, 69451827024 ],\n \"samples_ts\": [ 1.71551, 0.959328, 1.843 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T15:09:50Z", - "avg_ns": 12170000515, - "stddev_ns": 36688025, - "avg_ts": 10.51773, - "stddev_ts": 0.031717, - "samples_ns": [ - 12132096632, - 12172567753, - 12205337162 - ], - "samples_ts": [ - 10.5505, - 10.5154, - 10.4872 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-10T15:10:49Z", - "avg_ns": 92497267805, - "stddev_ns": 4233275682, - "avg_ts": 1.505948, - "stddev_ts": 0.47766, - "samples_ns": [ - 74613244205, - 133426732188, - 69451827024 - ], - "samples_ts": [ - 1.71551, - 0.959328, - 1.843 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 572 - }, - { - "timestamp_utc": "2025-12-10T15:33:59.515358+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T15:15:28Z\",\n \"avg_ns\": 17672462454,\n \"stddev_ns\": 4015700185,\n \"avg_ts\": 8.486484,\n \"stddev_ts\": 3.476590,\n \"samples_ns\": [ 12224787299, 12171037071, 28621562994 ],\n \"samples_ts\": [ 10.4705, 10.5168, 4.47215 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T15:16:34Z\",\n \"avg_ns\": 347984130654,\n \"stddev_ns\": 586236466,\n \"avg_ts\": 1.471334,\n \"stddev_ts\": 0.002476,\n \"samples_ns\": [ 347636901135, 348660981879, 347654508948 ],\n \"samples_ts\": [ 1.4728, 1.46848, 1.47273 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T15:15:28Z", - "avg_ns": 17672462454, - "stddev_ns": 4015700185, - "avg_ts": 8.486484, - "stddev_ts": 3.47659, - "samples_ns": [ - 12224787299, - 12171037071, - 28621562994 - ], - "samples_ts": [ - 10.4705, - 10.5168, - 4.47215 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-10T15:16:34Z", - "avg_ns": 347984130654, - "stddev_ns": 586236466, - "avg_ts": 1.471334, - "stddev_ts": 0.002476, - "samples_ns": [ - 347636901135, - 348660981879, - 347654508948 - ], - "samples_ts": [ - 1.4728, - 1.46848, - 1.47273 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 573 - }, - { - "timestamp_utc": "2025-12-10T15:45:23.988159+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T15:34:01Z\",\n \"avg_ns\": 104196150271,\n \"stddev_ns\": 1639481184,\n \"avg_ts\": 5.770751,\n \"stddev_ts\": 2.484954,\n \"samples_ns\": [ 81516989510, 166758273862, 64313187441 ],\n \"samples_ts\": [ 6.2809, 3.07031, 7.96104 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T15:40:48Z\",\n \"avg_ns\": 91742002367,\n \"stddev_ns\": 3550029161,\n \"avg_ts\": 1.513045,\n \"stddev_ts\": 0.480521,\n \"samples_ns\": [ 66826922249, 130481283740, 77917801114 ],\n \"samples_ts\": [ 1.9154, 0.980984, 1.64276 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T15:34:01Z", - "avg_ns": 104196150271, - "stddev_ns": 1639481184, - "avg_ts": 5.770751, - "stddev_ts": 2.484954, - "samples_ns": [ - 81516989510, - 166758273862, - 64313187441 - ], - "samples_ts": [ - 6.2809, - 3.07031, - 7.96104 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-10T15:40:48Z", - "avg_ns": 91742002367, - "stddev_ns": 3550029161, - "avg_ts": 1.513045, - "stddev_ts": 0.480521, - "samples_ns": [ - 66826922249, - 130481283740, - 77917801114 - ], - "samples_ts": [ - 1.9154, - 0.980984, - 1.64276 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 574 - }, - { - "timestamp_utc": "2025-12-10T16:09:02.329621+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T15:45:24Z\",\n \"avg_ns\": 114687631451,\n \"stddev_ns\": 1990455047,\n \"avg_ts\": 4.885034,\n \"stddev_ts\": 1.584212,\n \"samples_ns\": [ 167440287332, 89456713809, 87165893212 ],\n \"samples_ts\": [ 3.05781, 5.72344, 5.87386 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T15:52:05Z\",\n \"avg_ns\": 338567880434,\n \"stddev_ns\": 620057230,\n \"avg_ts\": 1.512256,\n \"stddev_ts\": 0.002768,\n \"samples_ns\": [ 339251093080, 338411707801, 338040840423 ],\n \"samples_ts\": [ 1.50921, 1.51295, 1.51461 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T15:45:24Z", - "avg_ns": 114687631451, - "stddev_ns": 1990455047, - "avg_ts": 4.885034, - "stddev_ts": 1.584212, - "samples_ns": [ - 167440287332, - 89456713809, - 87165893212 - ], - "samples_ts": [ - 3.05781, - 5.72344, - 5.87386 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_type": "gemma3 4B Q8_0", - "model_size": 4123860992, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-10T15:52:05Z", - "avg_ns": 338567880434, - "stddev_ns": 620057230, - "avg_ts": 1.512256, - "stddev_ts": 0.002768, - "samples_ns": [ - 339251093080, - 338411707801, - 338040840423 - ], - "samples_ts": [ - 1.50921, - 1.51295, - 1.51461 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q8_0", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 575 - }, - { - "timestamp_utc": "2025-12-10T16:15:02.475707+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T16:09:16Z\",\n \"avg_ns\": 29148844290,\n \"stddev_ns\": 5531485,\n \"avg_ts\": 4.391255,\n \"stddev_ts\": 0.000833,\n \"samples_ns\": [ 29152693084, 29142512046, 29151327742 ],\n \"samples_ts\": [ 4.39067, 4.39221, 4.39088 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T16:11:34Z\",\n \"avg_ns\": 69137713024,\n \"stddev_ns\": 1980839746,\n \"avg_ts\": 1.956380,\n \"stddev_ts\": 0.602425,\n \"samples_ns\": [ 48265483342, 79508494977, 79639160754 ],\n \"samples_ts\": [ 2.652, 1.60989, 1.60725 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T16:09:16Z", - "avg_ns": 29148844290, - "stddev_ns": 5531485, - "avg_ts": 4.391255, - "stddev_ts": 0.000833, - "samples_ns": [ - 29152693084, - 29142512046, - 29151327742 - ], - "samples_ts": [ - 4.39067, - 4.39221, - 4.39088 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-10T16:11:34Z", - "avg_ns": 69137713024, - "stddev_ns": 1980839746, - "avg_ts": 1.95638, - "stddev_ts": 0.602425, - "samples_ns": [ - 48265483342, - 79508494977, - 79639160754 - ], - "samples_ts": [ - 2.652, - 1.60989, - 1.60725 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 576 - }, - { - "timestamp_utc": "2025-12-10T16:28:51.009347+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T16:15:03Z\",\n \"avg_ns\": 29231107599,\n \"stddev_ns\": 59142795,\n \"avg_ts\": 4.378909,\n \"stddev_ts\": 0.008868,\n \"samples_ns\": [ 29164341878, 29276925257, 29252055662 ],\n \"samples_ts\": [ 4.38892, 4.37204, 4.37576 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T16:17:01Z\",\n \"avg_ns\": 236197011048,\n \"stddev_ns\": 3397857119,\n \"avg_ts\": 2.177303,\n \"stddev_ts\": 0.181479,\n \"samples_ns\": [ 246964907730, 247117954585, 214508170829 ],\n \"samples_ts\": [ 2.07317, 2.07189, 2.38686 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T16:15:03Z", - "avg_ns": 29231107599, - "stddev_ns": 59142795, - "avg_ts": 4.378909, - "stddev_ts": 0.008868, - "samples_ns": [ - 29164341878, - 29276925257, - 29252055662 - ], - "samples_ts": [ - 4.38892, - 4.37204, - 4.37576 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-10T16:17:01Z", - "avg_ns": 236197011048, - "stddev_ns": 3397857119, - "avg_ts": 2.177303, - "stddev_ts": 0.181479, - "samples_ns": [ - 246964907730, - 247117954585, - 214508170829 - ], - "samples_ts": [ - 2.07317, - 2.07189, - 2.38686 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 577 - }, - { - "timestamp_utc": "2025-12-10T16:41:58.677050+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T16:28:52Z\",\n \"avg_ns\": 160583762245,\n \"stddev_ns\": 3366089379,\n \"avg_ts\": 3.211049,\n \"stddev_ts\": 0.338798,\n \"samples_ns\": [ 166166670312, 142397992518, 173186623906 ],\n \"samples_ts\": [ 3.08124, 3.59556, 2.95635 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T16:39:23Z\",\n \"avg_ns\": 51451459191,\n \"stddev_ns\": 1443440715,\n \"avg_ts\": 2.552551,\n \"stddev_ts\": 0.470732,\n \"samples_ns\": [ 63713198037, 45284453824, 45356725714 ],\n \"samples_ts\": [ 2.009, 2.82658, 2.82207 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T16:28:52Z", - "avg_ns": 160583762245, - "stddev_ns": 3366089379, - "avg_ts": 3.211049, - "stddev_ts": 0.338798, - "samples_ns": [ - 166166670312, - 142397992518, - 173186623906 - ], - "samples_ts": [ - 3.08124, - 3.59556, - 2.95635 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-10T16:39:23Z", - "avg_ns": 51451459191, - "stddev_ns": 1443440715, - "avg_ts": 2.552551, - "stddev_ts": 0.470732, - "samples_ns": [ - 63713198037, - 45284453824, - 45356725714 - ], - "samples_ts": [ - 2.009, - 2.82658, - 2.82207 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 578 - }, - { - "timestamp_utc": "2025-12-10T17:05:12.189955+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T16:41:59Z\",\n \"avg_ns\": 142078510495,\n \"stddev_ns\": 3847687929,\n \"avg_ts\": 3.798493,\n \"stddev_ts\": 0.972715,\n \"samples_ns\": [ 117072353376, 191372956262, 117790221849 ],\n \"samples_ts\": [ 4.37336, 2.6754, 4.34671 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T16:52:17Z\",\n \"avg_ns\": 257965023070,\n \"stddev_ns\": 773771921,\n \"avg_ts\": 1.984777,\n \"stddev_ts\": 0.005955,\n \"samples_ns\": [ 257164470669, 258021700847, 258708897694 ],\n \"samples_ts\": [ 1.99094, 1.98433, 1.97906 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T16:41:59Z", - "avg_ns": 142078510495, - "stddev_ns": 3847687929, - "avg_ts": 3.798493, - "stddev_ts": 0.972715, - "samples_ns": [ - 117072353376, - 191372956262, - 117790221849 - ], - "samples_ts": [ - 4.37336, - 2.6754, - 4.34671 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-10T16:52:17Z", - "avg_ns": 257965023070, - "stddev_ns": 773771921, - "avg_ts": 1.984777, - "stddev_ts": 0.005955, - "samples_ns": [ - 257164470669, - 258021700847, - 258708897694 - ], - "samples_ts": [ - 1.99094, - 1.98433, - 1.97906 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 579 - }, - { - "timestamp_utc": "2025-12-10T17:10:40.794136+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T17:05:12Z\",\n \"avg_ns\": 29263489202,\n \"stddev_ns\": 72454359,\n \"avg_ts\": 4.374069,\n \"stddev_ts\": 0.010818,\n \"samples_ns\": [ 29206357700, 29344985358, 29239124548 ],\n \"samples_ts\": [ 4.38261, 4.3619, 4.3777 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T17:07:10Z\",\n \"avg_ns\": 70071223786,\n \"stddev_ns\": 1849050261,\n \"avg_ts\": 1.889014,\n \"stddev_ts\": 0.446457,\n \"samples_ns\": [ 76407382423, 80521013756, 53285275180 ],\n \"samples_ts\": [ 1.67523, 1.58965, 2.40216 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T17:05:12Z", - "avg_ns": 29263489202, - "stddev_ns": 72454359, - "avg_ts": 4.374069, - "stddev_ts": 0.010818, - "samples_ns": [ - 29206357700, - 29344985358, - 29239124548 - ], - "samples_ts": [ - 4.38261, - 4.3619, - 4.3777 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-10T17:07:10Z", - "avg_ns": 70071223786, - "stddev_ns": 1849050261, - "avg_ts": 1.889014, - "stddev_ts": 0.446457, - "samples_ns": [ - 76407382423, - 80521013756, - 53285275180 - ], - "samples_ts": [ - 1.67523, - 1.58965, - 2.40216 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 580 - }, - { - "timestamp_utc": "2025-12-10T17:25:09.021897+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T17:10:41Z\",\n \"avg_ns\": 35100270962,\n \"stddev_ns\": 1598366816,\n \"avg_ts\": 3.831382,\n \"stddev_ts\": 0.953473,\n \"samples_ns\": [ 29085943913, 29339152360, 46875716614 ],\n \"samples_ts\": [ 4.40075, 4.36277, 2.73062 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T17:12:56Z\",\n \"avg_ns\": 244001589812,\n \"stddev_ns\": 4264898738,\n \"avg_ts\": 2.101011,\n \"stddev_ts\": 0.092784,\n \"samples_ns\": [ 231871309673, 249811173266, 250322286498 ],\n \"samples_ts\": [ 2.20812, 2.04955, 2.04536 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T17:10:41Z", - "avg_ns": 35100270962, - "stddev_ns": 1598366816, - "avg_ts": 3.831382, - "stddev_ts": 0.953473, - "samples_ns": [ - 29085943913, - 29339152360, - 46875716614 - ], - "samples_ts": [ - 4.40075, - 4.36277, - 2.73062 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-10T17:12:56Z", - "avg_ns": 244001589812, - "stddev_ns": 4264898738, - "avg_ts": 2.101011, - "stddev_ts": 0.092784, - "samples_ns": [ - 231871309673, - 249811173266, - 250322286498 - ], - "samples_ts": [ - 2.20812, - 2.04955, - 2.04536 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 581 - }, - { - "timestamp_utc": "2025-12-10T17:38:05.570322+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T17:25:09Z\",\n \"avg_ns\": 167061706463,\n \"stddev_ns\": 3436018349,\n \"avg_ts\": 3.223639,\n \"stddev_ts\": 0.948038,\n \"samples_ns\": [ 191864088608, 118565110020, 190755920762 ],\n \"samples_ts\": [ 2.66856, 4.3183, 2.68406 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T17:35:28Z\",\n \"avg_ns\": 52129954475,\n \"stddev_ns\": 3473918482,\n \"avg_ts\": 2.542762,\n \"stddev_ts\": 0.540655,\n \"samples_ns\": [ 44775433076, 44895051161, 66719379188 ],\n \"samples_ts\": [ 2.85871, 2.85109, 1.91848 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T17:25:09Z", - "avg_ns": 167061706463, - "stddev_ns": 3436018349, - "avg_ts": 3.223639, - "stddev_ts": 0.948038, - "samples_ns": [ - 191864088608, - 118565110020, - 190755920762 - ], - "samples_ts": [ - 2.66856, - 4.3183, - 2.68406 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-10T17:35:28Z", - "avg_ns": 52129954475, - "stddev_ns": 3473918482, - "avg_ts": 2.542762, - "stddev_ts": 0.540655, - "samples_ns": [ - 44775433076, - 44895051161, - 66719379188 - ], - "samples_ts": [ - 2.85871, - 2.85109, - 1.91848 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 582 - }, - { - "timestamp_utc": "2025-12-10T18:00:21.892874+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T17:38:07Z\",\n \"avg_ns\": 153993016899,\n \"stddev_ns\": 4075979347,\n \"avg_ts\": 3.330654,\n \"stddev_ts\": 0.170026,\n \"samples_ns\": [ 146566696624, 162328708059, 153083646015 ],\n \"samples_ts\": [ 3.49329, 3.15409, 3.34458 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T17:48:38Z\",\n \"avg_ns\": 234273877733,\n \"stddev_ns\": 4254926311,\n \"avg_ts\": 2.197664,\n \"stddev_ts\": 0.205791,\n \"samples_ns\": [ 210241869219, 246345236902, 246234527080 ],\n \"samples_ts\": [ 2.43529, 2.07838, 2.07932 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T17:38:07Z", - "avg_ns": 153993016899, - "stddev_ns": 4075979347, - "avg_ts": 3.330654, - "stddev_ts": 0.170026, - "samples_ns": [ - 146566696624, - 162328708059, - 153083646015 - ], - "samples_ts": [ - 3.49329, - 3.15409, - 3.34458 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-10T17:48:38Z", - "avg_ns": 234273877733, - "stddev_ns": 4254926311, - "avg_ts": 2.197664, - "stddev_ts": 0.205791, - "samples_ns": [ - 210241869219, - 246345236902, - 246234527080 - ], - "samples_ts": [ - 2.43529, - 2.07838, - 2.07932 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 583 - }, - { - "timestamp_utc": "2025-12-10T18:05:51.125178+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T18:00:22Z\",\n \"avg_ns\": 31070754584,\n \"stddev_ns\": 2556392572,\n \"avg_ts\": 4.137456,\n \"stddev_ts\": 0.324997,\n \"samples_ns\": [ 29635184193, 29554821753, 34022257806 ],\n \"samples_ts\": [ 4.31919, 4.33093, 3.76224 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T18:02:25Z\",\n \"avg_ns\": 68259534222,\n \"stddev_ns\": 4092100122,\n \"avg_ts\": 2.015621,\n \"stddev_ts\": 0.715223,\n \"samples_ns\": [ 79803253964, 79928540435, 45046808269 ],\n \"samples_ts\": [ 1.60394, 1.60143, 2.84149 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T18:00:22Z", - "avg_ns": 31070754584, - "stddev_ns": 2556392572, - "avg_ts": 4.137456, - "stddev_ts": 0.324997, - "samples_ns": [ - 29635184193, - 29554821753, - 34022257806 - ], - "samples_ts": [ - 4.31919, - 4.33093, - 3.76224 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-10T18:02:25Z", - "avg_ns": 68259534222, - "stddev_ns": 4092100122, - "avg_ts": 2.015621, - "stddev_ts": 0.715223, - "samples_ns": [ - 79803253964, - 79928540435, - 45046808269 - ], - "samples_ts": [ - 1.60394, - 1.60143, - 2.84149 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 584 - }, - { - "timestamp_utc": "2025-12-10T18:20:14.260910+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T18:05:51Z\",\n \"avg_ns\": 37876261177,\n \"stddev_ns\": 4013651336,\n \"avg_ts\": 3.587677,\n \"stddev_ts\": 0.985454,\n \"samples_ns\": [ 29366359272, 32595868261, 51666555999 ],\n \"samples_ts\": [ 4.35873, 3.92688, 2.47742 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T18:08:14Z\",\n \"avg_ns\": 239547716073,\n \"stddev_ns\": 1101134169,\n \"avg_ts\": 2.143352,\n \"stddev_ts\": 0.141400,\n \"samples_ns\": [ 221973109854, 248718046030, 247951992335 ],\n \"samples_ts\": [ 2.30659, 2.05856, 2.06492 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T18:05:51Z", - "avg_ns": 37876261177, - "stddev_ns": 4013651336, - "avg_ts": 3.587677, - "stddev_ts": 0.985454, - "samples_ns": [ - 29366359272, - 32595868261, - 51666555999 - ], - "samples_ts": [ - 4.35873, - 3.92688, - 2.47742 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-10T18:08:14Z", - "avg_ns": 239547716073, - "stddev_ns": 1101134169, - "avg_ts": 2.143352, - "stddev_ts": 0.1414, - "samples_ns": [ - 221973109854, - 248718046030, - 247951992335 - ], - "samples_ts": [ - 2.30659, - 2.05856, - 2.06492 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 585 - }, - { - "timestamp_utc": "2025-12-10T18:33:07.033883+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T18:20:15Z\",\n \"avg_ns\": 167365779701,\n \"stddev_ns\": 1513785278,\n \"avg_ts\": 3.184489,\n \"stddev_ts\": 0.829035,\n \"samples_ns\": [ 192050827512, 123652506768, 186394004823 ],\n \"samples_ts\": [ 2.66596, 4.14064, 2.74687 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T18:30:35Z\",\n \"avg_ns\": 50376340816,\n \"stddev_ns\": 4091904551,\n \"avg_ts\": 2.645667,\n \"stddev_ts\": 0.601094,\n \"samples_ns\": [ 42767315692, 42773923035, 65587783723 ],\n \"samples_ts\": [ 2.99294, 2.99248, 1.95158 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T18:20:15Z", - "avg_ns": 167365779701, - "stddev_ns": 1513785278, - "avg_ts": 3.184489, - "stddev_ts": 0.829035, - "samples_ns": [ - 192050827512, - 123652506768, - 186394004823 - ], - "samples_ts": [ - 2.66596, - 4.14064, - 2.74687 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-10T18:30:35Z", - "avg_ns": 50376340816, - "stddev_ns": 4091904551, - "avg_ts": 2.645667, - "stddev_ts": 0.601094, - "samples_ns": [ - 42767315692, - 42773923035, - 65587783723 - ], - "samples_ts": [ - 2.99294, - 2.99248, - 1.95158 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 586 - }, - { - "timestamp_utc": "2025-12-10T18:56:06.236679+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T18:33:08Z\",\n \"avg_ns\": 155055719171,\n \"stddev_ns\": 2863146145,\n \"avg_ts\": 3.306190,\n \"stddev_ts\": 0.143822,\n \"samples_ns\": [ 148164549366, 161579441178, 155423166969 ],\n \"samples_ts\": [ 3.45562, 3.16872, 3.29423 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T18:43:42Z\",\n \"avg_ns\": 247631291769,\n \"stddev_ns\": 3496395705,\n \"avg_ts\": 2.073882,\n \"stddev_ts\": 0.142136,\n \"samples_ns\": [ 229106247484, 253274939689, 260512688134 ],\n \"samples_ts\": [ 2.23477, 2.02152, 1.96536 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T18:33:08Z", - "avg_ns": 155055719171, - "stddev_ns": 2863146145, - "avg_ts": 3.30619, - "stddev_ts": 0.143822, - "samples_ns": [ - 148164549366, - 161579441178, - 155423166969 - ], - "samples_ts": [ - 3.45562, - 3.16872, - 3.29423 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-10T18:43:42Z", - "avg_ns": 247631291769, - "stddev_ns": 3496395705, - "avg_ts": 2.073882, - "stddev_ts": 0.142136, - "samples_ns": [ - 229106247484, - 253274939689, - 260512688134 - ], - "samples_ts": [ - 2.23477, - 2.02152, - 1.96536 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 587 - }, - { - "timestamp_utc": "2025-12-10T19:01:35.280149+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T18:56:07Z\",\n \"avg_ns\": 41713328628,\n \"stddev_ns\": 3625890262,\n \"avg_ts\": 3.251993,\n \"stddev_ts\": 1.003260,\n \"samples_ns\": [ 29214415770, 43977336569, 51948233546 ],\n \"samples_ts\": [ 4.3814, 2.91059, 2.46399 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T18:58:41Z\",\n \"avg_ns\": 57723258993,\n \"stddev_ns\": 3811866383,\n \"avg_ts\": 2.391182,\n \"stddev_ts\": 0.718134,\n \"samples_ns\": [ 81903781436, 46164213097, 45101782448 ],\n \"samples_ts\": [ 1.56281, 2.77271, 2.83803 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T18:56:07Z", - "avg_ns": 41713328628, - "stddev_ns": 3625890262, - "avg_ts": 3.251993, - "stddev_ts": 1.00326, - "samples_ns": [ - 29214415770, - 43977336569, - 51948233546 - ], - "samples_ts": [ - 4.3814, - 2.91059, - 2.46399 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-10T18:58:41Z", - "avg_ns": 57723258993, - "stddev_ns": 3811866383, - "avg_ts": 2.391182, - "stddev_ts": 0.718134, - "samples_ns": [ - 81903781436, - 46164213097, - 45101782448 - ], - "samples_ts": [ - 1.56281, - 2.77271, - 2.83803 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 588 - }, - { - "timestamp_utc": "2025-12-10T19:16:29.590070+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T19:01:36Z\",\n \"avg_ns\": 49212678274,\n \"stddev_ns\": 3267083054,\n \"avg_ts\": 2.616072,\n \"stddev_ts\": 0.250167,\n \"samples_ns\": [ 44064253589, 51654617734, 51919163500 ],\n \"samples_ts\": [ 2.90485, 2.478, 2.46537 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T19:04:33Z\",\n \"avg_ns\": 238579252373,\n \"stddev_ns\": 3212791807,\n \"avg_ts\": 2.153672,\n \"stddev_ts\": 0.154041,\n \"samples_ns\": [ 226211693800, 230621747087, 258904316233 ],\n \"samples_ts\": [ 2.26337, 2.22009, 1.97756 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T19:01:36Z", - "avg_ns": 49212678274, - "stddev_ns": 3267083054, - "avg_ts": 2.616072, - "stddev_ts": 0.250167, - "samples_ns": [ - 44064253589, - 51654617734, - 51919163500 - ], - "samples_ts": [ - 2.90485, - 2.478, - 2.46537 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-10T19:04:33Z", - "avg_ns": 238579252373, - "stddev_ns": 3212791807, - "avg_ts": 2.153672, - "stddev_ts": 0.154041, - "samples_ns": [ - 226211693800, - 230621747087, - 258904316233 - ], - "samples_ts": [ - 2.26337, - 2.22009, - 1.97756 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 589 - }, - { - "timestamp_utc": "2025-12-10T19:30:29.528816+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T19:16:30Z\",\n \"avg_ns\": 149789005317,\n \"stddev_ns\": 603031066,\n \"avg_ts\": 3.520857,\n \"stddev_ts\": 0.700974,\n \"samples_ns\": [ 136248598622, 187400975584, 125717441745 ],\n \"samples_ts\": [ 3.75784, 2.73211, 4.07263 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T19:26:56Z\",\n \"avg_ns\": 70769043092,\n \"stddev_ns\": 1389338108,\n \"avg_ts\": 1.830306,\n \"stddev_ts\": 0.236642,\n \"samples_ns\": [ 67298600436, 81730910700, 63277618140 ],\n \"samples_ts\": [ 1.90197, 1.56611, 2.02283 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T19:16:30Z", - "avg_ns": 149789005317, - "stddev_ns": 603031066, - "avg_ts": 3.520857, - "stddev_ts": 0.700974, - "samples_ns": [ - 136248598622, - 187400975584, - 125717441745 - ], - "samples_ts": [ - 3.75784, - 2.73211, - 4.07263 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-10T19:26:56Z", - "avg_ns": 70769043092, - "stddev_ns": 1389338108, - "avg_ts": 1.830306, - "stddev_ts": 0.236642, - "samples_ns": [ - 67298600436, - 81730910700, - 63277618140 - ], - "samples_ts": [ - 1.90197, - 1.56611, - 2.02283 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 590 - }, - { - "timestamp_utc": "2025-12-10T19:51:55.389573+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T19:30:30Z\",\n \"avg_ns\": 163813370318,\n \"stddev_ns\": 2059099829,\n \"avg_ts\": 3.189356,\n \"stddev_ts\": 0.579789,\n \"samples_ns\": [ 182521519227, 132770816061, 176147775667 ],\n \"samples_ts\": [ 2.80515, 3.85627, 2.90665 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T19:40:47Z\",\n \"avg_ns\": 222321303646,\n \"stddev_ns\": 2538118437,\n \"avg_ts\": 2.313729,\n \"stddev_ts\": 0.192571,\n \"samples_ns\": [ 241743732413, 204561253574, 220658924952 ],\n \"samples_ts\": [ 2.11795, 2.50292, 2.32032 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T19:30:30Z", - "avg_ns": 163813370318, - "stddev_ns": 2059099829, - "avg_ts": 3.189356, - "stddev_ts": 0.579789, - "samples_ns": [ - 182521519227, - 132770816061, - 176147775667 - ], - "samples_ts": [ - 2.80515, - 3.85627, - 2.90665 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-10T19:40:47Z", - "avg_ns": 222321303646, - "stddev_ns": 2538118437, - "avg_ts": 2.313729, - "stddev_ts": 0.192571, - "samples_ns": [ - 241743732413, - 204561253574, - 220658924952 - ], - "samples_ts": [ - 2.11795, - 2.50292, - 2.32032 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 591 - }, - { - "timestamp_utc": "2025-12-10T19:57:28.834171+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T19:51:56Z\",\n \"avg_ns\": 51576809610,\n \"stddev_ns\": 571299173,\n \"avg_ts\": 2.481940,\n \"stddev_ts\": 0.027627,\n \"samples_ns\": [ 52029800607, 51765623710, 50935004514 ],\n \"samples_ts\": [ 2.46013, 2.47268, 2.51301 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T19:55:07Z\",\n \"avg_ns\": 46865923492,\n \"stddev_ns\": 1224684492,\n \"avg_ts\": 2.739767,\n \"stddev_ts\": 0.184018,\n \"samples_ns\": [ 45022675682, 44928342564, 50646752230 ],\n \"samples_ts\": [ 2.84301, 2.84898, 2.52731 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T19:51:56Z", - "avg_ns": 51576809610, - "stddev_ns": 571299173, - "avg_ts": 2.48194, - "stddev_ts": 0.027627, - "samples_ns": [ - 52029800607, - 51765623710, - 50935004514 - ], - "samples_ts": [ - 2.46013, - 2.47268, - 2.51301 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-10T19:55:07Z", - "avg_ns": 46865923492, - "stddev_ns": 1224684492, - "avg_ts": 2.739767, - "stddev_ts": 0.184018, - "samples_ns": [ - 45022675682, - 44928342564, - 50646752230 - ], - "samples_ts": [ - 2.84301, - 2.84898, - 2.52731 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 592 - }, - { - "timestamp_utc": "2025-12-10T20:12:16.122233+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T19:57:30Z\",\n \"avg_ns\": 44382629090,\n \"stddev_ns\": 4087784597,\n \"avg_ts\": 3.103966,\n \"stddev_ts\": 1.111681,\n \"samples_ns\": [ 52078747787, 51896111243, 29173028240 ],\n \"samples_ts\": [ 2.45782, 2.46647, 4.38761 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T20:00:35Z\",\n \"avg_ns\": 233362952181,\n \"stddev_ns\": 3655515556,\n \"avg_ts\": 2.198681,\n \"stddev_ts\": 0.122151,\n \"samples_ns\": [ 248824734463, 225343171539, 225920950541 ],\n \"samples_ts\": [ 2.05767, 2.27209, 2.26628 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T19:57:30Z", - "avg_ns": 44382629090, - "stddev_ns": 4087784597, - "avg_ts": 3.103966, - "stddev_ts": 1.111681, - "samples_ns": [ - 52078747787, - 51896111243, - 29173028240 - ], - "samples_ts": [ - 2.45782, - 2.46647, - 4.38761 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-10T20:00:35Z", - "avg_ns": 233362952181, - "stddev_ns": 3655515556, - "avg_ts": 2.198681, - "stddev_ts": 0.122151, - "samples_ns": [ - 248824734463, - 225343171539, - 225920950541 - ], - "samples_ts": [ - 2.05767, - 2.27209, - 2.26628 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 593 - }, - { - "timestamp_utc": "2025-12-10T20:25:56.336223+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T20:12:17Z\",\n \"avg_ns\": 146193151665,\n \"stddev_ns\": 3589031991,\n \"avg_ts\": 3.610748,\n \"stddev_ts\": 0.723859,\n \"samples_ns\": [ 124268742022, 184030670494, 130280042481 ],\n \"samples_ts\": [ 4.1201, 2.78214, 3.93 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T20:22:46Z\",\n \"avg_ns\": 62966840090,\n \"stddev_ns\": 3802778440,\n \"avg_ts\": 2.169725,\n \"stddev_ts\": 0.725748,\n \"samples_ns\": [ 75972322989, 70319619631, 42608577652 ],\n \"samples_ts\": [ 1.68482, 1.82026, 3.00409 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T20:12:17Z", - "avg_ns": 146193151665, - "stddev_ns": 3589031991, - "avg_ts": 3.610748, - "stddev_ts": 0.723859, - "samples_ns": [ - 124268742022, - 184030670494, - 130280042481 - ], - "samples_ts": [ - 4.1201, - 2.78214, - 3.93 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-10T20:22:46Z", - "avg_ns": 62966840090, - "stddev_ns": 3802778440, - "avg_ts": 2.169725, - "stddev_ts": 0.725748, - "samples_ns": [ - 75972322989, - 70319619631, - 42608577652 - ], - "samples_ts": [ - 1.68482, - 1.82026, - 3.00409 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 594 - }, - { - "timestamp_utc": "2025-12-10T20:47:30.923060+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T20:25:57Z\",\n \"avg_ns\": 156905142861,\n \"stddev_ns\": 456247848,\n \"avg_ts\": 3.264743,\n \"stddev_ts\": 0.088576,\n \"samples_ns\": [ 161848838287, 153863371623, 155003218673 ],\n \"samples_ts\": [ 3.16345, 3.32763, 3.30316 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T20:36:15Z\",\n \"avg_ns\": 225082723009,\n \"stddev_ns\": 4287975390,\n \"avg_ts\": 2.287652,\n \"stddev_ts\": 0.207735,\n \"samples_ns\": [ 248222872258, 219920609526, 207104687244 ],\n \"samples_ts\": [ 2.06266, 2.32811, 2.47218 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T20:25:57Z", - "avg_ns": 156905142861, - "stddev_ns": 456247848, - "avg_ts": 3.264743, - "stddev_ts": 0.088576, - "samples_ns": [ - 161848838287, - 153863371623, - 155003218673 - ], - "samples_ts": [ - 3.16345, - 3.32763, - 3.30316 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-10T20:36:15Z", - "avg_ns": 225082723009, - "stddev_ns": 4287975390, - "avg_ts": 2.287652, - "stddev_ts": 0.207735, - "samples_ns": [ - 248222872258, - 219920609526, - 207104687244 - ], - "samples_ts": [ - 2.06266, - 2.32811, - 2.47218 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 595 - }, - { - "timestamp_utc": "2025-12-10T20:53:20.631938+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T20:47:32Z\",\n \"avg_ns\": 44015831319,\n \"stddev_ns\": 2594725765,\n \"avg_ts\": 3.118851,\n \"stddev_ts\": 1.088440,\n \"samples_ns\": [ 51682043678, 51112126653, 29253323627 ],\n \"samples_ts\": [ 2.47668, 2.5043, 4.37557 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T20:50:36Z\",\n \"avg_ns\": 54585728643,\n \"stddev_ns\": 1297190884,\n \"avg_ts\": 2.471782,\n \"stddev_ts\": 0.633060,\n \"samples_ns\": [ 45092966712, 45134359917, 73529859302 ],\n \"samples_ts\": [ 2.83858, 2.83598, 1.74079 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T20:47:32Z", - "avg_ns": 44015831319, - "stddev_ns": 2594725765, - "avg_ts": 3.118851, - "stddev_ts": 1.08844, - "samples_ns": [ - 51682043678, - 51112126653, - 29253323627 - ], - "samples_ts": [ - 2.47668, - 2.5043, - 4.37557 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-10T20:50:36Z", - "avg_ns": 54585728643, - "stddev_ns": 1297190884, - "avg_ts": 2.471782, - "stddev_ts": 0.63306, - "samples_ns": [ - 45092966712, - 45134359917, - 73529859302 - ], - "samples_ts": [ - 2.83858, - 2.83598, - 1.74079 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 596 - }, - { - "timestamp_utc": "2025-12-10T21:08:11.444036+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T20:53:22Z\",\n \"avg_ns\": 36878159235,\n \"stddev_ns\": 2276181995,\n \"avg_ts\": 3.730752,\n \"stddev_ts\": 1.098630,\n \"samples_ns\": [ 51986716276, 29305737263, 29342024168 ],\n \"samples_ts\": [ 2.46217, 4.36775, 4.36234 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T20:56:04Z\",\n \"avg_ns\": 242073142030,\n \"stddev_ns\": 1277533721,\n \"avg_ts\": 2.121276,\n \"stddev_ts\": 0.140028,\n \"samples_ns\": [ 259012316803, 240288222021, 226918887266 ],\n \"samples_ts\": [ 1.97674, 2.13077, 2.25631 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T20:53:22Z", - "avg_ns": 36878159235, - "stddev_ns": 2276181995, - "avg_ts": 3.730752, - "stddev_ts": 1.09863, - "samples_ns": [ - 51986716276, - 29305737263, - 29342024168 - ], - "samples_ts": [ - 2.46217, - 4.36775, - 4.36234 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-10T20:56:04Z", - "avg_ns": 242073142030, - "stddev_ns": 1277533721, - "avg_ts": 2.121276, - "stddev_ts": 0.140028, - "samples_ns": [ - 259012316803, - 240288222021, - 226918887266 - ], - "samples_ts": [ - 1.97674, - 2.13077, - 2.25631 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 597 - }, - { - "timestamp_utc": "2025-12-10T21:21:39.457648+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T21:08:12Z\",\n \"avg_ns\": 156394030835,\n \"stddev_ns\": 3845706789,\n \"avg_ts\": 3.275966,\n \"stddev_ts\": 0.104528,\n \"samples_ns\": [ 150738812853, 159385086363, 159058193290 ],\n \"samples_ts\": [ 3.3966, 3.21235, 3.21895 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T21:18:49Z\",\n \"avg_ns\": 56256039890,\n \"stddev_ns\": 1438092158,\n \"avg_ts\": 2.434151,\n \"stddev_ts\": 0.695875,\n \"samples_ns\": [ 78497150178, 45083040941, 45187928552 ],\n \"samples_ts\": [ 1.63063, 2.83921, 2.83261 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T21:08:12Z", - "avg_ns": 156394030835, - "stddev_ns": 3845706789, - "avg_ts": 3.275966, - "stddev_ts": 0.104528, - "samples_ns": [ - 150738812853, - 159385086363, - 159058193290 - ], - "samples_ts": [ - 3.3966, - 3.21235, - 3.21895 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-10T21:18:49Z", - "avg_ns": 56256039890, - "stddev_ns": 1438092158, - "avg_ts": 2.434151, - "stddev_ts": 0.695875, - "samples_ns": [ - 78497150178, - 45083040941, - 45187928552 - ], - "samples_ts": [ - 1.63063, - 2.83921, - 2.83261 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 598 - }, - { - "timestamp_utc": "2025-12-10T21:44:51.855137+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T21:21:40Z\",\n \"avg_ns\": 146292042188,\n \"stddev_ns\": 4200805683,\n \"avg_ts\": 3.644029,\n \"stddev_ts\": 0.832975,\n \"samples_ns\": [ 128603602431, 189958794403, 120313729730 ],\n \"samples_ts\": [ 3.98123, 2.69532, 4.25554 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T21:32:01Z\",\n \"avg_ns\": 256703265651,\n \"stddev_ns\": 4072998301,\n \"avg_ts\": 1.994859,\n \"stddev_ts\": 0.031929,\n \"samples_ns\": [ 259481899536, 258600087851, 252027809566 ],\n \"samples_ts\": [ 1.97316, 1.97989, 2.03152 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T21:21:40Z", - "avg_ns": 146292042188, - "stddev_ns": 4200805683, - "avg_ts": 3.644029, - "stddev_ts": 0.832975, - "samples_ns": [ - 128603602431, - 189958794403, - 120313729730 - ], - "samples_ts": [ - 3.98123, - 2.69532, - 4.25554 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-10T21:32:01Z", - "avg_ns": 256703265651, - "stddev_ns": 4072998301, - "avg_ts": 1.994859, - "stddev_ts": 0.031929, - "samples_ns": [ - 259481899536, - 258600087851, - 252027809566 - ], - "samples_ts": [ - 1.97316, - 1.97989, - 2.03152 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 599 - }, - { - "timestamp_utc": "2025-12-10T21:50:19.618254+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T21:44:53Z\",\n \"avg_ns\": 29268764578,\n \"stddev_ns\": 8699711,\n \"avg_ts\": 4.373263,\n \"stddev_ts\": 0.001300,\n \"samples_ns\": [ 29277275106, 29269131345, 29259887283 ],\n \"samples_ts\": [ 4.37199, 4.37321, 4.37459 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T21:46:56Z\",\n \"avg_ns\": 67631260405,\n \"stddev_ns\": 3723269429,\n \"avg_ts\": 1.908893,\n \"stddev_ts\": 0.213613,\n \"samples_ns\": [ 60645345581, 75959229811, 66289205823 ],\n \"samples_ts\": [ 2.11063, 1.68511, 1.93093 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T21:44:53Z", - "avg_ns": 29268764578, - "stddev_ns": 8699711, - "avg_ts": 4.373263, - "stddev_ts": 0.0013, - "samples_ns": [ - 29277275106, - 29269131345, - 29259887283 - ], - "samples_ts": [ - 4.37199, - 4.37321, - 4.37459 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-10T21:46:56Z", - "avg_ns": 67631260405, - "stddev_ns": 3723269429, - "avg_ts": 1.908893, - "stddev_ts": 0.213613, - "samples_ns": [ - 60645345581, - 75959229811, - 66289205823 - ], - "samples_ts": [ - 2.11063, - 1.68511, - 1.93093 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 600 - }, - { - "timestamp_utc": "2025-12-10T22:05:11.831967+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T21:50:20Z\",\n \"avg_ns\": 29638684699,\n \"stddev_ns\": 696280740,\n \"avg_ts\": 4.320248,\n \"stddev_ts\": 0.100134,\n \"samples_ns\": [ 29237882174, 29235492662, 30442679261 ],\n \"samples_ts\": [ 4.37788, 4.37824, 4.20462 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T21:52:18Z\",\n \"avg_ns\": 257462759525,\n \"stddev_ns\": 533303273,\n \"avg_ts\": 1.988643,\n \"stddev_ts\": 0.004124,\n \"samples_ns\": [ 256847652662, 257795705557, 257744920357 ],\n \"samples_ts\": [ 1.9934, 1.98607, 1.98646 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T21:50:20Z", - "avg_ns": 29638684699, - "stddev_ns": 696280740, - "avg_ts": 4.320248, - "stddev_ts": 0.100134, - "samples_ns": [ - 29237882174, - 29235492662, - 30442679261 - ], - "samples_ts": [ - 4.37788, - 4.37824, - 4.20462 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-10T21:52:18Z", - "avg_ns": 257462759525, - "stddev_ns": 533303273, - "avg_ts": 1.988643, - "stddev_ts": 0.004124, - "samples_ns": [ - 256847652662, - 257795705557, - 257744920357 - ], - "samples_ts": [ - 1.9934, - 1.98607, - 1.98646 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 601 - }, - { - "timestamp_utc": "2025-12-10T22:17:54.170965+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T22:05:12Z\",\n \"avg_ns\": 166730291752,\n \"stddev_ns\": 3841825596,\n \"avg_ts\": 3.225327,\n \"stddev_ts\": 0.933934,\n \"samples_ns\": [ 191644088242, 118969893156, 189576893858 ],\n \"samples_ts\": [ 2.67162, 4.30361, 2.70075 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T22:15:29Z\",\n \"avg_ns\": 47946206740,\n \"stddev_ns\": 3309664874,\n \"avg_ts\": 2.736432,\n \"stddev_ts\": 0.495415,\n \"samples_ns\": [ 42387149409, 42312240822, 59139229989 ],\n \"samples_ts\": [ 3.01978, 3.02513, 2.16438 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T22:05:12Z", - "avg_ns": 166730291752, - "stddev_ns": 3841825596, - "avg_ts": 3.225327, - "stddev_ts": 0.933934, - "samples_ns": [ - 191644088242, - 118969893156, - 189576893858 - ], - "samples_ts": [ - 2.67162, - 4.30361, - 2.70075 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-10T22:15:29Z", - "avg_ns": 47946206740, - "stddev_ns": 3309664874, - "avg_ts": 2.736432, - "stddev_ts": 0.495415, - "samples_ns": [ - 42387149409, - 42312240822, - 59139229989 - ], - "samples_ts": [ - 3.01978, - 3.02513, - 2.16438 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 602 - }, - { - "timestamp_utc": "2025-12-10T22:41:01.038697+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T22:17:55Z\",\n \"avg_ns\": 151466350449,\n \"stddev_ns\": 2300202971,\n \"avg_ts\": 3.400024,\n \"stddev_ts\": 0.310935,\n \"samples_ns\": [ 140196441990, 167728956859, 146473652499 ],\n \"samples_ts\": [ 3.65202, 3.05254, 3.49551 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T22:28:23Z\",\n \"avg_ns\": 252139089787,\n \"stddev_ns\": 2434399118,\n \"avg_ts\": 2.037113,\n \"stddev_ts\": 0.143633,\n \"samples_ns\": [ 232417082300, 261635885542, 262364301520 ],\n \"samples_ts\": [ 2.20294, 1.95692, 1.95149 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T22:17:55Z", - "avg_ns": 151466350449, - "stddev_ns": 2300202971, - "avg_ts": 3.400024, - "stddev_ts": 0.310935, - "samples_ns": [ - 140196441990, - 167728956859, - 146473652499 - ], - "samples_ts": [ - 3.65202, - 3.05254, - 3.49551 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-10T22:28:23Z", - "avg_ns": 252139089787, - "stddev_ns": 2434399118, - "avg_ts": 2.037113, - "stddev_ts": 0.143633, - "samples_ns": [ - 232417082300, - 261635885542, - 262364301520 - ], - "samples_ts": [ - 2.20294, - 1.95692, - 1.95149 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 603 - }, - { - "timestamp_utc": "2025-12-10T22:46:22.632822+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T22:41:01Z\",\n \"avg_ns\": 40292332239,\n \"stddev_ns\": 2679153091,\n \"avg_ts\": 3.353264,\n \"stddev_ts\": 0.958728,\n \"samples_ns\": [ 29266396821, 39813367131, 51797232765 ],\n \"samples_ts\": [ 4.37362, 3.215, 2.47117 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T22:43:31Z\",\n \"avg_ns\": 56653175669,\n \"stddev_ns\": 3763480566,\n \"avg_ts\": 2.396228,\n \"stddev_ts\": 0.666949,\n \"samples_ns\": [ 76444083472, 50701282001, 42814161536 ],\n \"samples_ts\": [ 1.67443, 2.52459, 2.98966 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T22:41:01Z", - "avg_ns": 40292332239, - "stddev_ns": 2679153091, - "avg_ts": 3.353264, - "stddev_ts": 0.958728, - "samples_ns": [ - 29266396821, - 39813367131, - 51797232765 - ], - "samples_ts": [ - 4.37362, - 3.215, - 2.47117 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-10T22:43:31Z", - "avg_ns": 56653175669, - "stddev_ns": 3763480566, - "avg_ts": 2.396228, - "stddev_ts": 0.666949, - "samples_ns": [ - 76444083472, - 50701282001, - 42814161536 - ], - "samples_ts": [ - 1.67443, - 2.52459, - 2.98966 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 604 - }, - { - "timestamp_utc": "2025-12-10T23:01:15.988097+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T22:46:23Z\",\n \"avg_ns\": 45734365955,\n \"stddev_ns\": 2374029756,\n \"avg_ts\": 2.913136,\n \"stddev_ts\": 0.758150,\n \"samples_ns\": [ 33786183026, 51862740315, 51554174524 ],\n \"samples_ts\": [ 3.78853, 2.46805, 2.48283 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T22:49:10Z\",\n \"avg_ns\": 241745837397,\n \"stddev_ns\": 1914163138,\n \"avg_ts\": 2.124012,\n \"stddev_ts\": 0.138914,\n \"samples_ns\": [ 226494542375, 240525945601, 258217024215 ],\n \"samples_ts\": [ 2.26054, 2.12867, 1.98283 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T22:46:23Z", - "avg_ns": 45734365955, - "stddev_ns": 2374029756, - "avg_ts": 2.913136, - "stddev_ts": 0.75815, - "samples_ns": [ - 33786183026, - 51862740315, - 51554174524 - ], - "samples_ts": [ - 3.78853, - 2.46805, - 2.48283 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-10T22:49:10Z", - "avg_ns": 241745837397, - "stddev_ns": 1914163138, - "avg_ts": 2.124012, - "stddev_ts": 0.138914, - "samples_ns": [ - 226494542375, - 240525945601, - 258217024215 - ], - "samples_ts": [ - 2.26054, - 2.12867, - 1.98283 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 605 - }, - { - "timestamp_utc": "2025-12-10T23:15:08.915871+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T23:01:16Z\",\n \"avg_ns\": 153443890056,\n \"stddev_ns\": 2278706953,\n \"avg_ts\": 3.373216,\n \"stddev_ts\": 0.419651,\n \"samples_ns\": [ 147285734139, 175845373134, 137200562896 ],\n \"samples_ts\": [ 3.47624, 2.91165, 3.73176 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T23:11:43Z\",\n \"avg_ns\": 68480699706,\n \"stddev_ns\": 1039716377,\n \"avg_ts\": 1.932741,\n \"stddev_ts\": 0.457510,\n \"samples_ns\": [ 52020928665, 77606212249, 75814958206 ],\n \"samples_ts\": [ 2.46055, 1.64935, 1.68832 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T23:01:16Z", - "avg_ns": 153443890056, - "stddev_ns": 2278706953, - "avg_ts": 3.373216, - "stddev_ts": 0.419651, - "samples_ns": [ - 147285734139, - 175845373134, - 137200562896 - ], - "samples_ts": [ - 3.47624, - 2.91165, - 3.73176 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-10T23:11:43Z", - "avg_ns": 68480699706, - "stddev_ns": 1039716377, - "avg_ts": 1.932741, - "stddev_ts": 0.45751, - "samples_ns": [ - 52020928665, - 77606212249, - 75814958206 - ], - "samples_ts": [ - 2.46055, - 1.64935, - 1.68832 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 606 - }, - { - "timestamp_utc": "2025-12-10T23:37:07.777061+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T23:15:09Z\",\n \"avg_ns\": 166971417963,\n \"stddev_ns\": 2140886876,\n \"avg_ts\": 3.231523,\n \"stddev_ts\": 0.969024,\n \"samples_ns\": [ 191702626459, 117688836826, 191522790604 ],\n \"samples_ts\": [ 2.6708, 4.35046, 2.67331 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T23:25:28Z\",\n \"avg_ns\": 233007694525,\n \"stddev_ns\": 1317869555,\n \"avg_ts\": 2.199129,\n \"stddev_ts\": 0.076294,\n \"samples_ns\": [ 241699038079, 225556254827, 231767790671 ],\n \"samples_ts\": [ 2.11834, 2.26994, 2.20911 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T23:15:09Z", - "avg_ns": 166971417963, - "stddev_ns": 2140886876, - "avg_ts": 3.231523, - "stddev_ts": 0.969024, - "samples_ns": [ - 191702626459, - 117688836826, - 191522790604 - ], - "samples_ts": [ - 2.6708, - 4.35046, - 2.67331 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-10T23:25:28Z", - "avg_ns": 233007694525, - "stddev_ns": 1317869555, - "avg_ts": 2.199129, - "stddev_ts": 0.076294, - "samples_ns": [ - 241699038079, - 225556254827, - 231767790671 - ], - "samples_ts": [ - 2.11834, - 2.26994, - 2.20911 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 607 - }, - { - "timestamp_utc": "2025-12-10T23:42:39.534024+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T23:37:08Z\",\n \"avg_ns\": 48450662681,\n \"stddev_ns\": 4077821356,\n \"avg_ts\": 2.670484,\n \"stddev_ts\": 0.351217,\n \"samples_ns\": [ 51887711487, 51852235218, 41612041339 ],\n \"samples_ts\": [ 2.46687, 2.46855, 3.07603 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T23:40:19Z\",\n \"avg_ns\": 46354731181,\n \"stddev_ns\": 4121836556,\n \"avg_ts\": 2.789871,\n \"stddev_ts\": 0.333547,\n \"samples_ns\": [ 42885197922, 42950622954, 53228372669 ],\n \"samples_ts\": [ 2.98471, 2.98017, 2.40473 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T23:37:08Z", - "avg_ns": 48450662681, - "stddev_ns": 4077821356, - "avg_ts": 2.670484, - "stddev_ts": 0.351217, - "samples_ns": [ - 51887711487, - 51852235218, - 41612041339 - ], - "samples_ts": [ - 2.46687, - 2.46855, - 3.07603 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-10T23:40:19Z", - "avg_ns": 46354731181, - "stddev_ns": 4121836556, - "avg_ts": 2.789871, - "stddev_ts": 0.333547, - "samples_ns": [ - 42885197922, - 42950622954, - 53228372669 - ], - "samples_ts": [ - 2.98471, - 2.98017, - 2.40473 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 608 - }, - { - "timestamp_utc": "2025-12-10T23:57:26.366842+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T23:42:40Z\",\n \"avg_ns\": 42915060883,\n \"stddev_ns\": 3917026436,\n \"avg_ts\": 3.177057,\n \"stddev_ts\": 1.043922,\n \"samples_ns\": [ 51856360223, 47637353026, 29251469401 ],\n \"samples_ts\": [ 2.46836, 2.68697, 4.37585 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T23:45:41Z\",\n \"avg_ns\": 234820175314,\n \"stddev_ns\": 4280831922,\n \"avg_ts\": 2.186727,\n \"stddev_ts\": 0.141444,\n \"samples_ns\": [ 253034039252, 226031351429, 225395135263 ],\n \"samples_ts\": [ 2.02344, 2.26517, 2.27157 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T23:42:40Z", - "avg_ns": 42915060883, - "stddev_ns": 3917026436, - "avg_ts": 3.177057, - "stddev_ts": 1.043922, - "samples_ns": [ - 51856360223, - 47637353026, - 29251469401 - ], - "samples_ts": [ - 2.46836, - 2.68697, - 4.37585 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-10T23:45:41Z", - "avg_ns": 234820175314, - "stddev_ns": 4280831922, - "avg_ts": 2.186727, - "stddev_ts": 0.141444, - "samples_ns": [ - 253034039252, - 226031351429, - 225395135263 - ], - "samples_ts": [ - 2.02344, - 2.26517, - 2.27157 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 609 - }, - { - "timestamp_utc": "2025-12-11T00:11:08.964789+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T23:57:27Z\",\n \"avg_ns\": 149695354188,\n \"stddev_ns\": 2367838445,\n \"avg_ts\": 3.483364,\n \"stddev_ts\": 0.551906,\n \"samples_ns\": [ 130863439751, 178972965979, 139249656835 ],\n \"samples_ts\": [ 3.91248, 2.86077, 3.67685 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T00:08:04Z\",\n \"avg_ns\": 61261781330,\n \"stddev_ns\": 534327089,\n \"avg_ts\": 2.209763,\n \"stddev_ts\": 0.660813,\n \"samples_ns\": [ 77192849859, 63087101483, 43505392648 ],\n \"samples_ts\": [ 1.65818, 2.02894, 2.94216 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-10T23:57:27Z", - "avg_ns": 149695354188, - "stddev_ns": 2367838445, - "avg_ts": 3.483364, - "stddev_ts": 0.551906, - "samples_ns": [ - 130863439751, - 178972965979, - 139249656835 - ], - "samples_ts": [ - 3.91248, - 2.86077, - 3.67685 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-11T00:08:04Z", - "avg_ns": 61261781330, - "stddev_ns": 534327089, - "avg_ts": 2.209763, - "stddev_ts": 0.660813, - "samples_ns": [ - 77192849859, - 63087101483, - 43505392648 - ], - "samples_ts": [ - 1.65818, - 2.02894, - 2.94216 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 610 - }, - { - "timestamp_utc": "2025-12-11T00:34:07.028048+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T00:11:09Z\",\n \"avg_ns\": 154855673799,\n \"stddev_ns\": 4113881790,\n \"avg_ts\": 3.322036,\n \"stddev_ts\": 0.277531,\n \"samples_ns\": [ 152530539855, 169043856374, 142992625170 ],\n \"samples_ts\": [ 3.3567, 3.0288, 3.5806 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T00:21:33Z\",\n \"avg_ns\": 251164373508,\n \"stddev_ns\": 983914033,\n \"avg_ts\": 2.043477,\n \"stddev_ts\": 0.125603,\n \"samples_ns\": [ 259323210346, 260215227423, 233954682756 ],\n \"samples_ts\": [ 1.97437, 1.9676, 2.18846 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-11T00:11:09Z", - "avg_ns": 154855673799, - "stddev_ns": 4113881790, - "avg_ts": 3.322036, - "stddev_ts": 0.277531, - "samples_ns": [ - 152530539855, - 169043856374, - 142992625170 - ], - "samples_ts": [ - 3.3567, - 3.0288, - 3.5806 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-11T00:21:33Z", - "avg_ns": 251164373508, - "stddev_ns": 983914033, - "avg_ts": 2.043477, - "stddev_ts": 0.125603, - "samples_ns": [ - 259323210346, - 260215227423, - 233954682756 - ], - "samples_ts": [ - 1.97437, - 1.9676, - 2.18846 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 611 - }, - { - "timestamp_utc": "2025-12-11T00:39:18.395516+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T00:34:08Z\",\n \"avg_ns\": 16464739921,\n \"stddev_ns\": 219170040,\n \"avg_ts\": 7.937715,\n \"stddev_ts\": 1.326354,\n \"samples_ns\": [ 19980658291, 14718865459, 14694696014 ],\n \"samples_ts\": [ 6.4062, 8.69632, 8.71063 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T00:35:48Z\",\n \"avg_ns\": 69732962297,\n \"stddev_ns\": 3843950777,\n \"avg_ts\": 2.502547,\n \"stddev_ts\": 1.897251,\n \"samples_ns\": [ 27272906788, 90812433184, 91113546921 ],\n \"samples_ts\": [ 4.6933, 1.4095, 1.40484 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-11T00:34:08Z", - "avg_ns": 16464739921, - "stddev_ns": 219170040, - "avg_ts": 7.937715, - "stddev_ts": 1.326354, - "samples_ns": [ - 19980658291, - 14718865459, - 14694696014 - ], - "samples_ts": [ - 6.4062, - 8.69632, - 8.71063 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-11T00:35:48Z", - "avg_ns": 69732962297, - "stddev_ns": 3843950777, - "avg_ts": 2.502547, - "stddev_ts": 1.897251, - "samples_ns": [ - 27272906788, - 90812433184, - 91113546921 - ], - "samples_ts": [ - 4.6933, - 1.4095, - 1.40484 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 612 - }, - { - "timestamp_utc": "2025-12-11T00:51:21.442589+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T00:39:19Z\",\n \"avg_ns\": 14736955161,\n \"stddev_ns\": 45936907,\n \"avg_ts\": 8.685704,\n \"stddev_ts\": 0.027031,\n \"samples_ns\": [ 14703342454, 14718225070, 14789297959 ],\n \"samples_ts\": [ 8.7055, 8.6967, 8.65491 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T00:40:52Z\",\n \"avg_ns\": 209635713941,\n \"stddev_ns\": 2097757675,\n \"avg_ts\": 2.532357,\n \"stddev_ts\": 0.568652,\n \"samples_ns\": [ 264909242968, 196891414736, 167106484119 ],\n \"samples_ts\": [ 1.93274, 2.60042, 3.06391 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-11T00:39:19Z", - "avg_ns": 14736955161, - "stddev_ns": 45936907, - "avg_ts": 8.685704, - "stddev_ts": 0.027031, - "samples_ns": [ - 14703342454, - 14718225070, - 14789297959 - ], - "samples_ts": [ - 8.7055, - 8.6967, - 8.65491 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-11T00:40:52Z", - "avg_ns": 209635713941, - "stddev_ns": 2097757675, - "avg_ts": 2.532357, - "stddev_ts": 0.568652, - "samples_ns": [ - 264909242968, - 196891414736, - 167106484119 - ], - "samples_ts": [ - 1.93274, - 2.60042, - 3.06391 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 613 - }, - { - "timestamp_utc": "2025-12-11T01:04:06.339618+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T00:51:23Z\",\n \"avg_ns\": 119362696041,\n \"stddev_ns\": 2857858724,\n \"avg_ts\": 4.865382,\n \"stddev_ts\": 2.048860,\n \"samples_ns\": [ 73117982937, 175473512720, 109496592467 ],\n \"samples_ts\": [ 7.00238, 2.91782, 4.67594 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T01:00:45Z\",\n \"avg_ns\": 66869353145,\n \"stddev_ns\": 2321121207,\n \"avg_ts\": 2.684874,\n \"stddev_ts\": 2.122672,\n \"samples_ns\": [ 24927669697, 84068647581, 91611742159 ],\n \"samples_ts\": [ 5.13486, 1.52257, 1.3972 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-11T00:51:23Z", - "avg_ns": 119362696041, - "stddev_ns": 2857858724, - "avg_ts": 4.865382, - "stddev_ts": 2.04886, - "samples_ns": [ - 73117982937, - 175473512720, - 109496592467 - ], - "samples_ts": [ - 7.00238, - 2.91782, - 4.67594 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-11T01:00:45Z", - "avg_ns": 66869353145, - "stddev_ns": 2321121207, - "avg_ts": 2.684874, - "stddev_ts": 2.122672, - "samples_ns": [ - 24927669697, - 84068647581, - 91611742159 - ], - "samples_ts": [ - 5.13486, - 1.52257, - 1.3972 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 614 - }, - { - "timestamp_utc": "2025-12-11T01:24:54.327806+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T01:04:07Z\",\n \"avg_ns\": 131895545198,\n \"stddev_ns\": 919386378,\n \"avg_ts\": 3.939954,\n \"stddev_ts\": 0.606200,\n \"samples_ns\": [ 147310484601, 137691464443, 110684686551 ],\n \"samples_ts\": [ 3.47565, 3.71846, 4.62575 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T01:12:24Z\",\n \"avg_ns\": 249563575523,\n \"stddev_ns\": 3413817620,\n \"avg_ts\": 2.071453,\n \"stddev_ts\": 0.256504,\n \"samples_ns\": [ 216468730885, 270617224407, 261604771277 ],\n \"samples_ts\": [ 2.36524, 1.89197, 1.95715 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-11T01:04:07Z", - "avg_ns": 131895545198, - "stddev_ns": 919386378, - "avg_ts": 3.939954, - "stddev_ts": 0.6062, - "samples_ns": [ - 147310484601, - 137691464443, - 110684686551 - ], - "samples_ts": [ - 3.47565, - 3.71846, - 4.62575 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-11T01:12:24Z", - "avg_ns": 249563575523, - "stddev_ns": 3413817620, - "avg_ts": 2.071453, - "stddev_ts": 0.256504, - "samples_ns": [ - 216468730885, - 270617224407, - 261604771277 - ], - "samples_ts": [ - 2.36524, - 1.89197, - 1.95715 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 615 - }, - { - "timestamp_utc": "2025-12-11T01:30:06.808809+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T01:24:55Z\",\n \"avg_ns\": 14723585338,\n \"stddev_ns\": 30530825,\n \"avg_ts\": 8.693560,\n \"stddev_ts\": 0.018041,\n \"samples_ns\": [ 14749022313, 14732005019, 14689728684 ],\n \"samples_ts\": [ 8.67854, 8.68857, 8.71357 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T01:26:02Z\",\n \"avg_ns\": 81284049515,\n \"stddev_ns\": 3945289792,\n \"avg_ts\": 1.614488,\n \"stddev_ts\": 0.328001,\n \"samples_ns\": [ 64217916318, 90016664466, 89617567762 ],\n \"samples_ts\": [ 1.99321, 1.42196, 1.42829 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-11T01:24:55Z", - "avg_ns": 14723585338, - "stddev_ns": 30530825, - "avg_ts": 8.69356, - "stddev_ts": 0.018041, - "samples_ns": [ - 14749022313, - 14732005019, - 14689728684 - ], - "samples_ts": [ - 8.67854, - 8.68857, - 8.71357 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-11T01:26:02Z", - "avg_ns": 81284049515, - "stddev_ns": 3945289792, - "avg_ts": 1.614488, - "stddev_ts": 0.328001, - "samples_ns": [ - 64217916318, - 90016664466, - 89617567762 - ], - "samples_ts": [ - 1.99321, - 1.42196, - 1.42829 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 616 - }, - { - "timestamp_utc": "2025-12-11T01:42:16.001756+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T01:30:07Z\",\n \"avg_ns\": 14723689268,\n \"stddev_ns\": 17979659,\n \"avg_ts\": 8.693482,\n \"stddev_ts\": 0.010608,\n \"samples_ns\": [ 14712414619, 14744422964, 14714230223 ],\n \"samples_ts\": [ 8.70014, 8.68125, 8.69906 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T01:31:06Z\",\n \"avg_ns\": 223004230552,\n \"stddev_ns\": 1430551399,\n \"avg_ts\": 2.388407,\n \"stddev_ts\": 0.608742,\n \"samples_ns\": [ 265106558287, 237640359813, 166265773558 ],\n \"samples_ts\": [ 1.9313, 2.15452, 3.07941 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-11T01:30:07Z", - "avg_ns": 14723689268, - "stddev_ns": 17979659, - "avg_ts": 8.693482, - "stddev_ts": 0.010608, - "samples_ns": [ - 14712414619, - 14744422964, - 14714230223 - ], - "samples_ts": [ - 8.70014, - 8.68125, - 8.69906 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-11T01:31:06Z", - "avg_ns": 223004230552, - "stddev_ns": 1430551399, - "avg_ts": 2.388407, - "stddev_ts": 0.608742, - "samples_ns": [ - 265106558287, - 237640359813, - 166265773558 - ], - "samples_ts": [ - 1.9313, - 2.15452, - 3.07941 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 617 - }, - { - "timestamp_utc": "2025-12-11T01:54:54.249764+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T01:42:17Z\",\n \"avg_ns\": 114381696280,\n \"stddev_ns\": 4086253766,\n \"avg_ts\": 5.683132,\n \"stddev_ts\": 2.755106,\n \"samples_ns\": [ 66612690190, 201480988412, 75051410240 ],\n \"samples_ts\": [ 7.68622, 2.54118, 6.82199 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T01:50:59Z\",\n \"avg_ns\": 78149256775,\n \"stddev_ns\": 1697208764,\n \"avg_ts\": 1.726206,\n \"stddev_ts\": 0.517964,\n \"samples_ns\": [ 55070452393, 89798687237, 89578630697 ],\n \"samples_ts\": [ 2.3243, 1.42541, 1.42891 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-11T01:42:17Z", - "avg_ns": 114381696280, - "stddev_ns": 4086253766, - "avg_ts": 5.683132, - "stddev_ts": 2.755106, - "samples_ns": [ - 66612690190, - 201480988412, - 75051410240 - ], - "samples_ts": [ - 7.68622, - 2.54118, - 6.82199 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-11T01:50:59Z", - "avg_ns": 78149256775, - "stddev_ns": 1697208764, - "avg_ts": 1.726206, - "stddev_ts": 0.517964, - "samples_ns": [ - 55070452393, - 89798687237, - 89578630697 - ], - "samples_ts": [ - 2.3243, - 1.42541, - 1.42891 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 618 - }, - { - "timestamp_utc": "2025-12-11T02:15:10.421049+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T01:54:55Z\",\n \"avg_ns\": 141542091146,\n \"stddev_ns\": 4235084661,\n \"avg_ts\": 3.791607,\n \"stddev_ts\": 1.013341,\n \"samples_ns\": [ 178969764188, 105110319433, 140546189817 ],\n \"samples_ts\": [ 2.86082, 4.87107, 3.64293 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T02:03:07Z\",\n \"avg_ns\": 240704846051,\n \"stddev_ns\": 1875493850,\n \"avg_ts\": 2.191489,\n \"stddev_ts\": 0.488798,\n \"samples_ns\": [ 185788547135, 266982440144, 269343550875 ],\n \"samples_ts\": [ 2.75582, 1.91773, 1.90092 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-11T01:54:55Z", - "avg_ns": 141542091146, - "stddev_ns": 4235084661, - "avg_ts": 3.791607, - "stddev_ts": 1.013341, - "samples_ns": [ - 178969764188, - 105110319433, - 140546189817 - ], - "samples_ts": [ - 2.86082, - 4.87107, - 3.64293 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-11T02:03:07Z", - "avg_ns": 240704846051, - "stddev_ns": 1875493850, - "avg_ts": 2.191489, - "stddev_ts": 0.488798, - "samples_ns": [ - 185788547135, - 266982440144, - 269343550875 - ], - "samples_ts": [ - 2.75582, - 1.91773, - 1.90092 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 619 - }, - { - "timestamp_utc": "2025-12-11T02:20:14.706080+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T02:15:11Z\",\n \"avg_ns\": 14991751883,\n \"stddev_ns\": 513300969,\n \"avg_ts\": 8.544577,\n \"stddev_ts\": 0.286886,\n \"samples_ns\": [ 14693616370, 14697182083, 15584457197 ],\n \"samples_ts\": [ 8.71127, 8.70915, 8.21331 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T02:16:10Z\",\n \"avg_ns\": 81128844281,\n \"stddev_ns\": 1587843136,\n \"avg_ts\": 1.597029,\n \"stddev_ts\": 0.220999,\n \"samples_ns\": [ 83880374687, 90122433956, 69383724200 ],\n \"samples_ts\": [ 1.52598, 1.42029, 1.84481 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-11T02:15:11Z", - "avg_ns": 14991751883, - "stddev_ns": 513300969, - "avg_ts": 8.544577, - "stddev_ts": 0.286886, - "samples_ns": [ - 14693616370, - 14697182083, - 15584457197 - ], - "samples_ts": [ - 8.71127, - 8.70915, - 8.21331 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-11T02:16:10Z", - "avg_ns": 81128844281, - "stddev_ns": 1587843136, - "avg_ts": 1.597029, - "stddev_ts": 0.220999, - "samples_ns": [ - 83880374687, - 90122433956, - 69383724200 - ], - "samples_ts": [ - 1.52598, - 1.42029, - 1.84481 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 620 - }, - { - "timestamp_utc": "2025-12-11T02:32:46.508316+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T02:20:15Z\",\n \"avg_ns\": 16766090024,\n \"stddev_ns\": 3468245846,\n \"avg_ts\": 7.834104,\n \"stddev_ts\": 1.447690,\n \"samples_ns\": [ 14748958802, 14778472150, 20770839120 ],\n \"samples_ts\": [ 8.67858, 8.66125, 6.16249 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T02:21:20Z\",\n \"avg_ns\": 228303093671,\n \"stddev_ns\": 3827415819,\n \"avg_ts\": 2.338644,\n \"stddev_ts\": 0.623407,\n \"samples_ns\": [ 258577974580, 258928459206, 167402847229 ],\n \"samples_ts\": [ 1.98006, 1.97738, 3.05849 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-11T02:20:15Z", - "avg_ns": 16766090024, - "stddev_ns": 3468245846, - "avg_ts": 7.834104, - "stddev_ts": 1.44769, - "samples_ns": [ - 14748958802, - 14778472150, - 20770839120 - ], - "samples_ts": [ - 8.67858, - 8.66125, - 6.16249 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-11T02:21:20Z", - "avg_ns": 228303093671, - "stddev_ns": 3827415819, - "avg_ts": 2.338644, - "stddev_ts": 0.623407, - "samples_ns": [ - 258577974580, - 258928459206, - 167402847229 - ], - "samples_ts": [ - 1.98006, - 1.97738, - 3.05849 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 621 - }, - { - "timestamp_utc": "2025-12-11T02:45:14.988320+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T02:32:48Z\",\n \"avg_ns\": 114827209997,\n \"stddev_ns\": 4153298739,\n \"avg_ts\": 5.616477,\n \"stddev_ts\": 2.947104,\n \"samples_ns\": [ 89971785527, 194456707565, 60053136901 ],\n \"samples_ts\": [ 5.69067, 2.63298, 8.52578 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T02:41:10Z\",\n \"avg_ns\": 81487742487,\n \"stddev_ns\": 4264635006,\n \"avg_ts\": 1.593918,\n \"stddev_ts\": 0.241182,\n \"samples_ns\": [ 83708370715, 91970208709, 68784648037 ],\n \"samples_ts\": [ 1.52912, 1.39176, 1.86088 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-11T02:32:48Z", - "avg_ns": 114827209997, - "stddev_ns": 4153298739, - "avg_ts": 5.616477, - "stddev_ts": 2.947104, - "samples_ns": [ - 89971785527, - 194456707565, - 60053136901 - ], - "samples_ts": [ - 5.69067, - 2.63298, - 8.52578 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-11T02:41:10Z", - "avg_ns": 81487742487, - "stddev_ns": 4264635006, - "avg_ts": 1.593918, - "stddev_ts": 0.241182, - "samples_ns": [ - 83708370715, - 91970208709, - 68784648037 - ], - "samples_ts": [ - 1.52912, - 1.39176, - 1.86088 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 622 - }, - { - "timestamp_utc": "2025-12-11T03:05:24.271059+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T02:45:15Z\",\n \"avg_ns\": 149869546011,\n \"stddev_ns\": 3814843656,\n \"avg_ts\": 4.098589,\n \"stddev_ts\": 2.338995,\n \"samples_ns\": [ 201971737539, 75432199784, 172204700711 ],\n \"samples_ts\": [ 2.53501, 6.78755, 2.97321 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T02:53:52Z\",\n \"avg_ns\": 230475340571,\n \"stddev_ns\": 3854817231,\n \"avg_ts\": 2.274775,\n \"stddev_ts\": 0.437931,\n \"samples_ns\": [ 185857054035, 235677367574, 269891600104 ],\n \"samples_ts\": [ 2.75481, 2.17246, 1.89706 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-11T02:45:15Z", - "avg_ns": 149869546011, - "stddev_ns": 3814843656, - "avg_ts": 4.098589, - "stddev_ts": 2.338995, - "samples_ns": [ - 201971737539, - 75432199784, - 172204700711 - ], - "samples_ts": [ - 2.53501, - 6.78755, - 2.97321 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-11T02:53:52Z", - "avg_ns": 230475340571, - "stddev_ns": 3854817231, - "avg_ts": 2.274775, - "stddev_ts": 0.437931, - "samples_ns": [ - 185857054035, - 235677367574, - 269891600104 - ], - "samples_ts": [ - 2.75481, - 2.17246, - 1.89706 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 623 - }, - { - "timestamp_utc": "2025-12-11T03:10:27.536075+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T03:05:25Z\",\n \"avg_ns\": 24342748877,\n \"stddev_ns\": 4135610344,\n \"avg_ts\": 6.653568,\n \"stddev_ts\": 3.181350,\n \"samples_ns\": [ 14732892646, 15449175496, 42846178490 ],\n \"samples_ts\": [ 8.68804, 8.28523, 2.98743 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T03:06:52Z\",\n \"avg_ns\": 71255341187,\n \"stddev_ns\": 3712854669,\n \"avg_ts\": 2.234255,\n \"stddev_ts\": 1.415775,\n \"samples_ns\": [ 90625213210, 90057698158, 33083112193 ],\n \"samples_ts\": [ 1.41241, 1.42131, 3.86904 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-11T03:05:25Z", - "avg_ns": 24342748877, - "stddev_ns": 4135610344, - "avg_ts": 6.653568, - "stddev_ts": 3.18135, - "samples_ns": [ - 14732892646, - 15449175496, - 42846178490 - ], - "samples_ts": [ - 8.68804, - 8.28523, - 2.98743 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-11T03:06:52Z", - "avg_ns": 71255341187, - "stddev_ns": 3712854669, - "avg_ts": 2.234255, - "stddev_ts": 1.415775, - "samples_ns": [ - 90625213210, - 90057698158, - 33083112193 - ], - "samples_ts": [ - 1.41241, - 1.42131, - 3.86904 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 624 - }, - { - "timestamp_utc": "2025-12-11T03:24:37.258501+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T03:10:28Z\",\n \"avg_ns\": 27146114758,\n \"stddev_ns\": 4226486993,\n \"avg_ts\": 6.204613,\n \"stddev_ts\": 3.189912,\n \"samples_ns\": [ 14783592601, 17398554033, 49256197642 ],\n \"samples_ts\": [ 8.65825, 7.35693, 2.59866 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T03:12:04Z\",\n \"avg_ns\": 250497810882,\n \"stddev_ns\": 4264668079,\n \"avg_ts\": 2.052303,\n \"stddev_ts\": 0.160069,\n \"samples_ns\": [ 231720932772, 270962978307, 248809521568 ],\n \"samples_ts\": [ 2.20955, 1.88956, 2.0578 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-11T03:10:28Z", - "avg_ns": 27146114758, - "stddev_ns": 4226486993, - "avg_ts": 6.204613, - "stddev_ts": 3.189912, - "samples_ns": [ - 14783592601, - 17398554033, - 49256197642 - ], - "samples_ts": [ - 8.65825, - 7.35693, - 2.59866 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-11T03:12:04Z", - "avg_ns": 250497810882, - "stddev_ns": 4264668079, - "avg_ts": 2.052303, - "stddev_ts": 0.160069, - "samples_ns": [ - 231720932772, - 270962978307, - 248809521568 - ], - "samples_ts": [ - 2.20955, - 1.88956, - 2.0578 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 625 - }, - { - "timestamp_utc": "2025-12-11T03:35:44.320801+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T03:24:38Z\",\n \"avg_ns\": 138392876756,\n \"stddev_ns\": 3869872694,\n \"avg_ts\": 3.788894,\n \"stddev_ts\": 0.690879,\n \"samples_ns\": [ 168388286186, 116297135616, 130493208468 ],\n \"samples_ts\": [ 3.04059, 4.40252, 3.92358 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T03:32:53Z\",\n \"avg_ns\": 56775917952,\n \"stddev_ns\": 1347832650,\n \"avg_ts\": 3.028189,\n \"stddev_ts\": 2.061946,\n \"samples_ns\": [ 91803882730, 54575962468, 23947908660 ],\n \"samples_ts\": [ 1.39428, 2.34535, 5.34493 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-11T03:24:38Z", - "avg_ns": 138392876756, - "stddev_ns": 3869872694, - "avg_ts": 3.788894, - "stddev_ts": 0.690879, - "samples_ns": [ - 168388286186, - 116297135616, - 130493208468 - ], - "samples_ts": [ - 3.04059, - 4.40252, - 3.92358 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-11T03:32:53Z", - "avg_ns": 56775917952, - "stddev_ns": 1347832650, - "avg_ts": 3.028189, - "stddev_ts": 2.061946, - "samples_ns": [ - 91803882730, - 54575962468, - 23947908660 - ], - "samples_ts": [ - 1.39428, - 2.34535, - 5.34493 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 626 - }, - { - "timestamp_utc": "2025-12-11T03:55:52.865811+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T03:35:45Z\",\n \"avg_ns\": 144015746744,\n \"stddev_ns\": 2034561447,\n \"avg_ts\": 3.793179,\n \"stddev_ts\": 1.217501,\n \"samples_ns\": [ 147411689297, 99636189461, 184999361475 ],\n \"samples_ts\": [ 3.47327, 5.1387, 2.76758 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T03:45:14Z\",\n \"avg_ns\": 212678704307,\n \"stddev_ns\": 2225306862,\n \"avg_ts\": 2.434308,\n \"stddev_ts\": 0.310170,\n \"samples_ns\": [ 208056451703, 187522250531, 242457410687 ],\n \"samples_ts\": [ 2.46087, 2.73034, 2.11171 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-11T03:35:45Z", - "avg_ns": 144015746744, - "stddev_ns": 2034561447, - "avg_ts": 3.793179, - "stddev_ts": 1.217501, - "samples_ns": [ - 147411689297, - 99636189461, - 184999361475 - ], - "samples_ts": [ - 3.47327, - 5.1387, - 2.76758 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-11T03:45:14Z", - "avg_ns": 212678704307, - "stddev_ns": 2225306862, - "avg_ts": 2.434308, - "stddev_ts": 0.31017, - "samples_ns": [ - 208056451703, - 187522250531, - 242457410687 - ], - "samples_ts": [ - 2.46087, - 2.73034, - 2.11171 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 627 - }, - { - "timestamp_utc": "2025-12-11T04:00:53.244612+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T03:55:53Z\",\n \"avg_ns\": 47258106972,\n \"stddev_ns\": 2398054782,\n \"avg_ts\": 2.737920,\n \"stddev_ts\": 0.360380,\n \"samples_ns\": [ 40582751819, 50573247112, 50618321987 ],\n \"samples_ts\": [ 3.15405, 2.53098, 2.52873 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T03:58:30Z\",\n \"avg_ns\": 47233029766,\n \"stddev_ns\": 3659197379,\n \"avg_ts\": 3.814116,\n \"stddev_ts\": 2.110713,\n \"samples_ns\": [ 90330979973, 27638676597, 23729432730 ],\n \"samples_ts\": [ 1.41701, 4.63119, 5.39414 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-11T03:55:53Z", - "avg_ns": 47258106972, - "stddev_ns": 2398054782, - "avg_ts": 2.73792, - "stddev_ts": 0.36038, - "samples_ns": [ - 40582751819, - 50573247112, - 50618321987 - ], - "samples_ts": [ - 3.15405, - 2.53098, - 2.52873 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-11T03:58:30Z", - "avg_ns": 47233029766, - "stddev_ns": 3659197379, - "avg_ts": 3.814116, - "stddev_ts": 2.110713, - "samples_ns": [ - 90330979973, - 27638676597, - 23729432730 - ], - "samples_ts": [ - 1.41701, - 4.63119, - 5.39414 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 628 - }, - { - "timestamp_utc": "2025-12-11T04:15:21.107059+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T04:00:54Z\",\n \"avg_ns\": 47745606073,\n \"stddev_ns\": 3932700465,\n \"avg_ts\": 2.701626,\n \"stddev_ts\": 0.299098,\n \"samples_ns\": [ 42009332040, 50718026835, 50509459345 ],\n \"samples_ts\": [ 3.04694, 2.52376, 2.53418 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T04:03:32Z\",\n \"avg_ns\": 235788531069,\n \"stddev_ns\": 4120582693,\n \"avg_ts\": 2.226508,\n \"stddev_ts\": 0.449072,\n \"samples_ns\": [ 187071846804, 249690428748, 270603317656 ],\n \"samples_ts\": [ 2.73692, 2.05054, 1.89207 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-11T04:00:54Z", - "avg_ns": 47745606073, - "stddev_ns": 3932700465, - "avg_ts": 2.701626, - "stddev_ts": 0.299098, - "samples_ns": [ - 42009332040, - 50718026835, - 50509459345 - ], - "samples_ts": [ - 3.04694, - 2.52376, - 2.53418 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-11T04:03:32Z", - "avg_ns": 235788531069, - "stddev_ns": 4120582693, - "avg_ts": 2.226508, - "stddev_ts": 0.449072, - "samples_ns": [ - 187071846804, - 249690428748, - 270603317656 - ], - "samples_ts": [ - 2.73692, - 2.05054, - 1.89207 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 629 - }, - { - "timestamp_utc": "2025-12-11T04:26:00.004639+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T04:15:21Z\",\n \"avg_ns\": 149375203363,\n \"stddev_ns\": 3439693801,\n \"avg_ts\": 4.555268,\n \"stddev_ts\": 3.299361,\n \"samples_ns\": [ 201715843833, 61223548283, 185186217974 ],\n \"samples_ts\": [ 2.53822, 8.3628, 2.76478 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T04:24:10Z\",\n \"avg_ns\": 36183719619,\n \"stddev_ns\": 3741127843,\n \"avg_ts\": 4.474155,\n \"stddev_ts\": 2.134219,\n \"samples_ns\": [ 63688830895, 22431000471, 22431327491 ],\n \"samples_ts\": [ 2.00977, 5.70639, 5.70631 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-11T04:15:21Z", - "avg_ns": 149375203363, - "stddev_ns": 3439693801, - "avg_ts": 4.555268, - "stddev_ts": 3.299361, - "samples_ns": [ - 201715843833, - 61223548283, - 185186217974 - ], - "samples_ts": [ - 2.53822, - 8.3628, - 2.76478 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-11T04:24:10Z", - "avg_ns": 36183719619, - "stddev_ns": 3741127843, - "avg_ts": 4.474155, - "stddev_ts": 2.134219, - "samples_ns": [ - 63688830895, - 22431000471, - 22431327491 - ], - "samples_ts": [ - 2.00977, - 5.70639, - 5.70631 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 630 - }, - { - "timestamp_utc": "2025-12-11T04:45:54.707239+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T04:26:00Z\",\n \"avg_ns\": 131290070401,\n \"stddev_ns\": 192020584,\n \"avg_ts\": 3.962508,\n \"stddev_ts\": 0.629257,\n \"samples_ns\": [ 109741325631, 135806871669, 148322013905 ],\n \"samples_ts\": [ 4.66552, 3.77006, 3.45195 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T04:35:29Z\",\n \"avg_ns\": 208380009492,\n \"stddev_ns\": 3574208826,\n \"avg_ts\": 2.496861,\n \"stddev_ts\": 0.403104,\n \"samples_ns\": [ 229425221094, 172929844289, 222784963094 ],\n \"samples_ts\": [ 2.23166, 2.96074, 2.29818 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-11T04:26:00Z", - "avg_ns": 131290070401, - "stddev_ns": 192020584, - "avg_ts": 3.962508, - "stddev_ts": 0.629257, - "samples_ns": [ - 109741325631, - 135806871669, - 148322013905 - ], - "samples_ts": [ - 4.66552, - 3.77006, - 3.45195 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-11T04:35:29Z", - "avg_ns": 208380009492, - "stddev_ns": 3574208826, - "avg_ts": 2.496861, - "stddev_ts": 0.403104, - "samples_ns": [ - 229425221094, - 172929844289, - 222784963094 - ], - "samples_ts": [ - 2.23166, - 2.96074, - 2.29818 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 631 - }, - { - "timestamp_utc": "2025-12-11T04:50:55.171987+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T04:45:55Z\",\n \"avg_ns\": 48648588122,\n \"stddev_ns\": 3437820353,\n \"avg_ts\": 2.640278,\n \"stddev_ts\": 0.194512,\n \"samples_ns\": [ 44679099714, 50664638366, 50602026287 ],\n \"samples_ts\": [ 2.86487, 2.52642, 2.52954 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T04:48:37Z\",\n \"avg_ns\": 45730551449,\n \"stddev_ns\": 2280776749,\n \"avg_ts\": 4.072782,\n \"stddev_ts\": 2.292019,\n \"samples_ns\": [ 89749810108, 23719286074, 23722558166 ],\n \"samples_ts\": [ 1.42619, 5.39645, 5.39571 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-11T04:45:55Z", - "avg_ns": 48648588122, - "stddev_ns": 3437820353, - "avg_ts": 2.640278, - "stddev_ts": 0.194512, - "samples_ns": [ - 44679099714, - 50664638366, - 50602026287 - ], - "samples_ts": [ - 2.86487, - 2.52642, - 2.52954 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-11T04:48:37Z", - "avg_ns": 45730551449, - "stddev_ns": 2280776749, - "avg_ts": 4.072782, - "stddev_ts": 2.292019, - "samples_ns": [ - 89749810108, - 23719286074, - 23722558166 - ], - "samples_ts": [ - 1.42619, - 5.39645, - 5.39571 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 632 - }, - { - "timestamp_utc": "2025-12-11T05:05:20.734942+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T04:50:55Z\",\n \"avg_ns\": 49365137653,\n \"stddev_ns\": 4088939795,\n \"avg_ts\": 2.598428,\n \"stddev_ts\": 0.148882,\n \"samples_ns\": [ 46203693644, 50942634924, 50949084392 ],\n \"samples_ts\": [ 2.77034, 2.51263, 2.51231 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T04:53:40Z\",\n \"avg_ns\": 233259285466,\n \"stddev_ns\": 740795531,\n \"avg_ts\": 2.252090,\n \"stddev_ts\": 0.458553,\n \"samples_ns\": [ 184833126024, 245185498177, 269759232197 ],\n \"samples_ts\": [ 2.77007, 2.08821, 1.89799 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-11T04:50:55Z", - "avg_ns": 49365137653, - "stddev_ns": 4088939795, - "avg_ts": 2.598428, - "stddev_ts": 0.148882, - "samples_ns": [ - 46203693644, - 50942634924, - 50949084392 - ], - "samples_ts": [ - 2.77034, - 2.51263, - 2.51231 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-11T04:53:40Z", - "avg_ns": 233259285466, - "stddev_ns": 740795531, - "avg_ts": 2.25209, - "stddev_ts": 0.458553, - "samples_ns": [ - 184833126024, - 245185498177, - 269759232197 - ], - "samples_ts": [ - 2.77007, - 2.08821, - 1.89799 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 633 - }, - { - "timestamp_utc": "2025-12-11T05:16:04.166818+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T05:05:21Z\",\n \"avg_ns\": 149818581561,\n \"stddev_ns\": 2261091159,\n \"avg_ts\": 4.520427,\n \"stddev_ts\": 3.246274,\n \"samples_ns\": [ 202093224473, 61936016904, 185426503306 ],\n \"samples_ts\": [ 2.53348, 8.2666, 2.7612 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T05:14:11Z\",\n \"avg_ns\": 37435876239,\n \"stddev_ns\": 4250329280,\n \"avg_ts\": 4.251875,\n \"stddev_ts\": 1.971981,\n \"samples_ns\": [ 64815777546, 23744364178, 23747486995 ],\n \"samples_ts\": [ 1.97483, 5.39075, 5.39004 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-11T05:05:21Z", - "avg_ns": 149818581561, - "stddev_ns": 2261091159, - "avg_ts": 4.520427, - "stddev_ts": 3.246274, - "samples_ns": [ - 202093224473, - 61936016904, - 185426503306 - ], - "samples_ts": [ - 2.53348, - 8.2666, - 2.7612 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-11T05:14:11Z", - "avg_ns": 37435876239, - "stddev_ns": 4250329280, - "avg_ts": 4.251875, - "stddev_ts": 1.971981, - "samples_ns": [ - 64815777546, - 23744364178, - 23747486995 - ], - "samples_ts": [ - 1.97483, - 5.39075, - 5.39004 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 634 - }, - { - "timestamp_utc": "2025-12-11T05:36:00.763051+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T05:16:05Z\",\n \"avg_ns\": 128208023593,\n \"stddev_ns\": 282319499,\n \"avg_ts\": 4.105766,\n \"stddev_ts\": 0.876242,\n \"samples_ns\": [ 100228068508, 146852444569, 137543557703 ],\n \"samples_ts\": [ 5.10835, 3.48649, 3.72246 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T05:25:34Z\",\n \"avg_ns\": 208816156525,\n \"stddev_ns\": 1943001344,\n \"avg_ts\": 2.505413,\n \"stddev_ts\": 0.447839,\n \"samples_ns\": [ 247983507860, 172972403431, 205492558284 ],\n \"samples_ts\": [ 2.06465, 2.96001, 2.49157 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-11T05:16:05Z", - "avg_ns": 128208023593, - "stddev_ns": 282319499, - "avg_ts": 4.105766, - "stddev_ts": 0.876242, - "samples_ns": [ - 100228068508, - 146852444569, - 137543557703 - ], - "samples_ts": [ - 5.10835, - 3.48649, - 3.72246 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-11T05:25:34Z", - "avg_ns": 208816156525, - "stddev_ns": 1943001344, - "avg_ts": 2.505413, - "stddev_ts": 0.447839, - "samples_ns": [ - 247983507860, - 172972403431, - 205492558284 - ], - "samples_ts": [ - 2.06465, - 2.96001, - 2.49157 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 635 - }, - { - "timestamp_utc": "2025-12-11T05:41:00.009530+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T05:36:01Z\",\n \"avg_ns\": 50479254160,\n \"stddev_ns\": 41393706,\n \"avg_ts\": 2.535696,\n \"stddev_ts\": 0.002079,\n \"samples_ns\": [ 50470849702, 50524204546, 50442708233 ],\n \"samples_ts\": [ 2.53612, 2.53344, 2.53753 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T05:38:57Z\",\n \"avg_ns\": 40618357628,\n \"stddev_ns\": 4177192517,\n \"avg_ts\": 4.183458,\n \"stddev_ts\": 2.137016,\n \"samples_ns\": [ 74598724221, 23641567320, 23614781343 ],\n \"samples_ts\": [ 1.71585, 5.41419, 5.42033 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-11T05:36:01Z", - "avg_ns": 50479254160, - "stddev_ns": 41393706, - "avg_ts": 2.535696, - "stddev_ts": 0.002079, - "samples_ns": [ - 50470849702, - 50524204546, - 50442708233 - ], - "samples_ts": [ - 2.53612, - 2.53344, - 2.53753 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-11T05:38:57Z", - "avg_ns": 40618357628, - "stddev_ns": 4177192517, - "avg_ts": 4.183458, - "stddev_ts": 2.137016, - "samples_ns": [ - 74598724221, - 23641567320, - 23614781343 - ], - "samples_ts": [ - 1.71585, - 5.41419, - 5.42033 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 636 - }, - { - "timestamp_utc": "2025-12-11T05:55:26.265204+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T05:41:00Z\",\n \"avg_ns\": 50917986730,\n \"stddev_ns\": 35882900,\n \"avg_ts\": 2.513847,\n \"stddev_ts\": 0.001771,\n \"samples_ns\": [ 50958828908, 50903604603, 50891526680 ],\n \"samples_ts\": [ 2.51183, 2.51456, 2.51515 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T05:43:56Z\",\n \"avg_ns\": 229678521223,\n \"stddev_ns\": 2480515687,\n \"avg_ts\": 2.280750,\n \"stddev_ts\": 0.427922,\n \"samples_ns\": [ 186759551415, 232200782896, 270075229359 ],\n \"samples_ts\": [ 2.74149, 2.20499, 1.89577 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-11T05:41:00Z", - "avg_ns": 50917986730, - "stddev_ns": 35882900, - "avg_ts": 2.513847, - "stddev_ts": 0.001771, - "samples_ns": [ - 50958828908, - 50903604603, - 50891526680 - ], - "samples_ts": [ - 2.51183, - 2.51456, - 2.51515 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-11T05:43:56Z", - "avg_ns": 229678521223, - "stddev_ns": 2480515687, - "avg_ts": 2.28075, - "stddev_ts": 0.427922, - "samples_ns": [ - 186759551415, - 232200782896, - 270075229359 - ], - "samples_ts": [ - 2.74149, - 2.20499, - 1.89577 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 637 - }, - { - "timestamp_utc": "2025-12-11T06:06:10.039277+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T05:55:27Z\",\n \"avg_ns\": 150244496012,\n \"stddev_ns\": 4227324849,\n \"avg_ts\": 4.582318,\n \"stddev_ts\": 3.394085,\n \"samples_ns\": [ 192174438372, 60226943230, 198332106434 ],\n \"samples_ts\": [ 2.66425, 8.50118, 2.58153 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T06:04:30Z\",\n \"avg_ns\": 32877976953,\n \"stddev_ns\": 4101964900,\n \"avg_ts\": 4.382379,\n \"stddev_ts\": 1.594718,\n \"samples_ns\": [ 50347366355, 23862215834, 24424348670 ],\n \"samples_ts\": [ 2.54234, 5.36413, 5.24067 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-11T05:55:27Z", - "avg_ns": 150244496012, - "stddev_ns": 4227324849, - "avg_ts": 4.582318, - "stddev_ts": 3.394085, - "samples_ns": [ - 192174438372, - 60226943230, - 198332106434 - ], - "samples_ts": [ - 2.66425, - 8.50118, - 2.58153 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-11T06:04:30Z", - "avg_ns": 32877976953, - "stddev_ns": 4101964900, - "avg_ts": 4.382379, - "stddev_ts": 1.594718, - "samples_ns": [ - 50347366355, - 23862215834, - 24424348670 - ], - "samples_ts": [ - 2.54234, - 5.36413, - 5.24067 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 638 - }, - { - "timestamp_utc": "2025-12-11T06:26:59.058208+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T06:06:10Z\",\n \"avg_ns\": 125825568828,\n \"stddev_ns\": 1325022270,\n \"avg_ts\": 4.437579,\n \"stddev_ts\": 1.497742,\n \"samples_ns\": [ 87493770366, 178496591024, 111486345094 ],\n \"samples_ts\": [ 5.85185, 2.8684, 4.59249 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T06:15:44Z\",\n \"avg_ns\": 224596510764,\n \"stddev_ns\": 975633322,\n \"avg_ts\": 2.331631,\n \"stddev_ts\": 0.416817,\n \"samples_ns\": [ 270951706130, 214436576475, 188401249688 ],\n \"samples_ts\": [ 1.88964, 2.38765, 2.7176 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-11T06:06:10Z", - "avg_ns": 125825568828, - "stddev_ns": 1325022270, - "avg_ts": 4.437579, - "stddev_ts": 1.497742, - "samples_ns": [ - 87493770366, - 178496591024, - 111486345094 - ], - "samples_ts": [ - 5.85185, - 2.8684, - 4.59249 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-11T06:15:44Z", - "avg_ns": 224596510764, - "stddev_ns": 975633322, - "avg_ts": 2.331631, - "stddev_ts": 0.416817, - "samples_ns": [ - 270951706130, - 214436576475, - 188401249688 - ], - "samples_ts": [ - 1.88964, - 2.38765, - 2.7176 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 639 - }, - { - "timestamp_utc": "2025-12-11T06:31:55.648087+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T06:27:00Z\",\n \"avg_ns\": 46914909371,\n \"stddev_ns\": 4107812918,\n \"avg_ts\": 2.769045,\n \"stddev_ts\": 0.429155,\n \"samples_ns\": [ 50683872148, 50851976980, 39208878986 ],\n \"samples_ts\": [ 2.52546, 2.51711, 3.26457 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T06:30:12Z\",\n \"avg_ns\": 34351142337,\n \"stddev_ns\": 434982969,\n \"avg_ts\": 4.351505,\n \"stddev_ts\": 1.767674,\n \"samples_ns\": [ 23719869740, 23935833472, 55397723799 ],\n \"samples_ts\": [ 5.39632, 5.34763, 2.31056 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-11T06:27:00Z", - "avg_ns": 46914909371, - "stddev_ns": 4107812918, - "avg_ts": 2.769045, - "stddev_ts": 0.429155, - "samples_ns": [ - 50683872148, - 50851976980, - 39208878986 - ], - "samples_ts": [ - 2.52546, - 2.51711, - 3.26457 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-11T06:30:12Z", - "avg_ns": 34351142337, - "stddev_ns": 434982969, - "avg_ts": 4.351505, - "stddev_ts": 1.767674, - "samples_ns": [ - 23719869740, - 23935833472, - 55397723799 - ], - "samples_ts": [ - 5.39632, - 5.34763, - 2.31056 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 640 - }, - { - "timestamp_utc": "2025-12-11T06:45:51.199405+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T06:31:57Z\",\n \"avg_ns\": 47681980319,\n \"stddev_ns\": 3700320709,\n \"avg_ts\": 2.711892,\n \"stddev_ts\": 0.346187,\n \"samples_ns\": [ 51004570617, 50905294951, 41136075389 ],\n \"samples_ts\": [ 2.50958, 2.51447, 3.11162 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T06:35:11Z\",\n \"avg_ns\": 213136154687,\n \"stddev_ns\": 3885954179,\n \"avg_ts\": 2.435895,\n \"stddev_ts\": 0.338896,\n \"samples_ns\": [ 200367218241, 189705517403, 249335728417 ],\n \"samples_ts\": [ 2.55531, 2.69892, 2.05346 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-11T06:31:57Z", - "avg_ns": 47681980319, - "stddev_ns": 3700320709, - "avg_ts": 2.711892, - "stddev_ts": 0.346187, - "samples_ns": [ - 51004570617, - 50905294951, - 41136075389 - ], - "samples_ts": [ - 2.50958, - 2.51447, - 3.11162 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-11T06:35:11Z", - "avg_ns": 213136154687, - "stddev_ns": 3885954179, - "avg_ts": 2.435895, - "stddev_ts": 0.338896, - "samples_ns": [ - 200367218241, - 189705517403, - 249335728417 - ], - "samples_ts": [ - 2.55531, - 2.69892, - 2.05346 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 641 - }, - { - "timestamp_utc": "2025-12-11T06:57:15.456355+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T06:45:52Z\",\n \"avg_ns\": 138668392106,\n \"stddev_ns\": 1848158513,\n \"avg_ts\": 3.796989,\n \"stddev_ts\": 0.754593,\n \"samples_ns\": [ 131838734023, 113662113271, 170504329025 ],\n \"samples_ts\": [ 3.88353, 4.50458, 3.00286 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T06:55:20Z\",\n \"avg_ns\": 38233950526,\n \"stddev_ns\": 4079803435,\n \"avg_ts\": 4.350320,\n \"stddev_ts\": 2.160184,\n \"samples_ns\": [ 22857324224, 22877367900, 68967159456 ],\n \"samples_ts\": [ 5.59996, 5.59505, 1.85596 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-11T06:45:52Z", - "avg_ns": 138668392106, - "stddev_ns": 1848158513, - "avg_ts": 3.796989, - "stddev_ts": 0.754593, - "samples_ns": [ - 131838734023, - 113662113271, - 170504329025 - ], - "samples_ts": [ - 3.88353, - 4.50458, - 3.00286 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-11T06:55:20Z", - "avg_ns": 38233950526, - "stddev_ns": 4079803435, - "avg_ts": 4.35032, - "stddev_ts": 2.160184, - "samples_ns": [ - 22857324224, - 22877367900, - 68967159456 - ], - "samples_ts": [ - 5.59996, - 5.59505, - 1.85596 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 642 - }, - { - "timestamp_utc": "2025-12-11T07:17:56.693672+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T06:57:16Z\",\n \"avg_ns\": 114519074035,\n \"stddev_ns\": 3887255302,\n \"avg_ts\": 5.693681,\n \"stddev_ts\": 2.772594,\n \"samples_ns\": [ 66457050401, 202312899732, 74787271974 ],\n \"samples_ts\": [ 7.70422, 2.53073, 6.84608 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T07:06:00Z\",\n \"avg_ns\": 238655835807,\n \"stddev_ns\": 3651353183,\n \"avg_ts\": 2.198358,\n \"stddev_ts\": 0.439636,\n \"samples_ns\": [ 270552245827, 255937051950, 189478209645 ],\n \"samples_ts\": [ 1.89243, 2.00049, 2.70216 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-11T06:57:16Z", - "avg_ns": 114519074035, - "stddev_ns": 3887255302, - "avg_ts": 5.693681, - "stddev_ts": 2.772594, - "samples_ns": [ - 66457050401, - 202312899732, - 74787271974 - ], - "samples_ts": [ - 7.70422, - 2.53073, - 6.84608 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-11T07:06:00Z", - "avg_ns": 238655835807, - "stddev_ns": 3651353183, - "avg_ts": 2.198358, - "stddev_ts": 0.439636, - "samples_ns": [ - 270552245827, - 255937051950, - 189478209645 - ], - "samples_ts": [ - 1.89243, - 2.00049, - 2.70216 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 643 - }, - { - "timestamp_utc": "2025-12-11T07:22:58.828622+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T07:17:58Z\",\n \"avg_ns\": 33289266138,\n \"stddev_ns\": 4183598919,\n \"avg_ts\": 4.967186,\n \"stddev_ts\": 3.252531,\n \"samples_ns\": [ 50601750168, 34486054097, 14779994149 ],\n \"samples_ts\": [ 2.52956, 3.71165, 8.66036 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T07:20:28Z\",\n \"avg_ns\": 49833720900,\n \"stddev_ns\": 3506003057,\n \"avg_ts\": 3.466534,\n \"stddev_ts\": 1.978609,\n \"samples_ns\": [ 23846706548, 35423695526, 90230760626 ],\n \"samples_ts\": [ 5.36762, 3.6134, 1.41858 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-11T07:17:58Z", - "avg_ns": 33289266138, - "stddev_ns": 4183598919, - "avg_ts": 4.967186, - "stddev_ts": 3.252531, - "samples_ns": [ - 50601750168, - 34486054097, - 14779994149 - ], - "samples_ts": [ - 2.52956, - 3.71165, - 8.66036 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-11T07:20:28Z", - "avg_ns": 49833720900, - "stddev_ns": 3506003057, - "avg_ts": 3.466534, - "stddev_ts": 1.978609, - "samples_ns": [ - 23846706548, - 35423695526, - 90230760626 - ], - "samples_ts": [ - 5.36762, - 3.6134, - 1.41858 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 644 - }, - { - "timestamp_utc": "2025-12-11T07:36:10.978509+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T07:23:00Z\",\n \"avg_ns\": 32826261882,\n \"stddev_ns\": 4231125906,\n \"avg_ts\": 5.014688,\n \"stddev_ts\": 3.206875,\n \"samples_ns\": [ 50730696093, 32920985162, 14827104391 ],\n \"samples_ts\": [ 2.52313, 3.8881, 8.63284 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T07:25:29Z\",\n \"avg_ns\": 213675429243,\n \"stddev_ns\": 4061683880,\n \"avg_ts\": 2.434074,\n \"stddev_ts\": 0.358611,\n \"samples_ns\": [ 252328629090, 188833428838, 199864229802 ],\n \"samples_ts\": [ 2.0291, 2.71138, 2.56174 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-11T07:23:00Z", - "avg_ns": 32826261882, - "stddev_ns": 4231125906, - "avg_ts": 5.014688, - "stddev_ts": 3.206875, - "samples_ns": [ - 50730696093, - 32920985162, - 14827104391 - ], - "samples_ts": [ - 2.52313, - 3.8881, - 8.63284 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-11T07:25:29Z", - "avg_ns": 213675429243, - "stddev_ns": 4061683880, - "avg_ts": 2.434074, - "stddev_ts": 0.358611, - "samples_ns": [ - 252328629090, - 188833428838, - 199864229802 - ], - "samples_ts": [ - 2.0291, - 2.71138, - 2.56174 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 645 - }, - { - "timestamp_utc": "2025-12-11T07:48:45.153392+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T07:36:11Z\",\n \"avg_ns\": 123722291827,\n \"stddev_ns\": 2026332419,\n \"avg_ts\": 4.423013,\n \"stddev_ts\": 1.408796,\n \"samples_ns\": [ 86223168176, 162410189263, 122533518042 ],\n \"samples_ts\": [ 5.93808, 3.15251, 4.17845 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T07:45:40Z\",\n \"avg_ns\": 61263382270,\n \"stddev_ns\": 3946714855,\n \"avg_ts\": 2.879556,\n \"stddev_ts\": 2.176333,\n \"samples_ns\": [ 23792569719, 69253942440, 90743634651 ],\n \"samples_ts\": [ 5.37983, 1.84827, 1.41057 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-11T07:36:11Z", - "avg_ns": 123722291827, - "stddev_ns": 2026332419, - "avg_ts": 4.423013, - "stddev_ts": 1.408796, - "samples_ns": [ - 86223168176, - 162410189263, - 122533518042 - ], - "samples_ts": [ - 5.93808, - 3.15251, - 4.17845 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-11T07:45:40Z", - "avg_ns": 61263382270, - "stddev_ns": 3946714855, - "avg_ts": 2.879556, - "stddev_ts": 2.176333, - "samples_ns": [ - 23792569719, - 69253942440, - 90743634651 - ], - "samples_ts": [ - 5.37983, - 1.84827, - 1.41057 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 646 - }, - { - "timestamp_utc": "2025-12-11T08:09:20.012129+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T07:48:46Z\",\n \"avg_ns\": 126728951964,\n \"stddev_ns\": 3476298254,\n \"avg_ts\": 4.198630,\n \"stddev_ts\": 1.044626,\n \"samples_ns\": [ 132065922574, 152710522352, 95410410968 ],\n \"samples_ts\": [ 3.87685, 3.35275, 5.36629 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T07:57:03Z\",\n \"avg_ns\": 245121915567,\n \"stddev_ns\": 1521927640,\n \"avg_ts\": 2.098638,\n \"stddev_ts\": 0.172235,\n \"samples_ns\": [ 231633918286, 269432745682, 234299082735 ],\n \"samples_ts\": [ 2.21038, 1.90029, 2.18524 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-11T07:48:46Z", - "avg_ns": 126728951964, - "stddev_ns": 3476298254, - "avg_ts": 4.19863, - "stddev_ts": 1.044626, - "samples_ns": [ - 132065922574, - 152710522352, - 95410410968 - ], - "samples_ts": [ - 3.87685, - 3.35275, - 5.36629 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_type": "gemma3 4B Q2_K - Medium", - "model_size": 1722623232, - "model_n_params": 3880263168, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-11T07:57:03Z", - "avg_ns": 245121915567, - "stddev_ns": 1521927640, - "avg_ts": 2.098638, - "stddev_ts": 0.172235, - "samples_ns": [ - 231633918286, - 269432745682, - 234299082735 - ], - "samples_ts": [ - 2.21038, - 1.90029, - 2.18524 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-4B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 647 - }, - { - "timestamp_utc": "2025-12-11T08:26:35.911272+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T08:09:57Z\",\n \"avg_ns\": 128702126225,\n \"stddev_ns\": 4175189081,\n \"avg_ts\": 1.002694,\n \"stddev_ts\": 0.108400,\n \"samples_ns\": [ 144953224225, 124188969367, 116964185083 ],\n \"samples_ts\": [ 0.883043, 1.03069, 1.09435 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T08:18:05Z\",\n \"avg_ns\": 169642853490,\n \"stddev_ns\": 2369381036,\n \"avg_ts\": 0.756759,\n \"stddev_ts\": 0.050736,\n \"samples_ns\": [ 180017827169, 171147731220, 157763002083 ],\n \"samples_ts\": [ 0.711041, 0.747892, 0.811344 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-11T08:09:57Z", - "avg_ns": 128702126225, - "stddev_ns": 4175189081, - "avg_ts": 1.002694, - "stddev_ts": 0.1084, - "samples_ns": [ - 144953224225, - 124188969367, - 116964185083 - ], - "samples_ts": [ - 0.883043, - 1.03069, - 1.09435 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-11T08:18:05Z", - "avg_ns": 169642853490, - "stddev_ns": 2369381036, - "avg_ts": 0.756759, - "stddev_ts": 0.050736, - "samples_ns": [ - 180017827169, - 171147731220, - 157763002083 - ], - "samples_ts": [ - 0.711041, - 0.747892, - 0.811344 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-12B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 648 - }, - { - "timestamp_utc": "2025-12-11T09:10:59.503908+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T08:26:37Z\",\n \"avg_ns\": 131381880652,\n \"stddev_ns\": 2631910218,\n \"avg_ts\": 0.988409,\n \"stddev_ts\": 0.142520,\n \"samples_ns\": [ 127294046356, 114124180955, 152727414646 ],\n \"samples_ts\": [ 1.00555, 1.12159, 0.838094 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T08:35:30Z\",\n \"avg_ns\": 709135124910,\n \"stddev_ns\": 4294193782,\n \"avg_ts\": 0.722633,\n \"stddev_ts\": 0.026064,\n \"samples_ns\": [ 683825831211, 734997061770, 708582481750 ],\n \"samples_ts\": [ 0.748729, 0.696601, 0.722569 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-11T08:26:37Z", - "avg_ns": 131381880652, - "stddev_ns": 2631910218, - "avg_ts": 0.988409, - "stddev_ts": 0.14252, - "samples_ns": [ - 127294046356, - 114124180955, - 152727414646 - ], - "samples_ts": [ - 1.00555, - 1.12159, - 0.838094 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-11T08:35:30Z", - "avg_ns": 709135124910, - "stddev_ns": 4294193782, - "avg_ts": 0.722633, - "stddev_ts": 0.026064, - "samples_ns": [ - 683825831211, - 734997061770, - 708582481750 - ], - "samples_ts": [ - 0.748729, - 0.696601, - 0.722569 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-12B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 649 - }, - { - "timestamp_utc": "2025-12-11T09:53:29.428687+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T09:11:00Z\",\n \"avg_ns\": 513645990250,\n \"stddev_ns\": 1657075855,\n \"avg_ts\": 0.998439,\n \"stddev_ts\": 0.050084,\n \"samples_ns\": [ 485701928294, 520303267508, 534932774948 ],\n \"samples_ts\": [ 1.05414, 0.984041, 0.95713 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T09:45:28Z\",\n \"avg_ns\": 159997195438,\n \"stddev_ns\": 2109821286,\n \"avg_ts\": 0.812608,\n \"stddev_ts\": 0.122489,\n \"samples_ns\": [ 137531306776, 186548650960, 155911628579 ],\n \"samples_ts\": [ 0.930697, 0.686148, 0.820978 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-11T09:11:00Z", - "avg_ns": 513645990250, - "stddev_ns": 1657075855, - "avg_ts": 0.998439, - "stddev_ts": 0.050084, - "samples_ns": [ - 485701928294, - 520303267508, - 534932774948 - ], - "samples_ts": [ - 1.05414, - 0.984041, - 0.95713 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-11T09:45:28Z", - "avg_ns": 159997195438, - "stddev_ns": 2109821286, - "avg_ts": 0.812608, - "stddev_ts": 0.122489, - "samples_ns": [ - 137531306776, - 186548650960, - 155911628579 - ], - "samples_ts": [ - 0.930697, - 0.686148, - 0.820978 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-12B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 650 - }, - { - "timestamp_utc": "2025-12-11T11:03:13.864572+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T09:53:31Z\",\n \"avg_ns\": 511640724724,\n \"stddev_ns\": 3815490425,\n \"avg_ts\": 1.002264,\n \"stddev_ts\": 0.048742,\n \"samples_ns\": [ 534172135170, 515345170971, 485404868031 ],\n \"samples_ts\": [ 0.958493, 0.993509, 1.05479 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T10:27:29Z\",\n \"avg_ns\": 714188458371,\n \"stddev_ns\": 4287179111,\n \"avg_ts\": 0.717880,\n \"stddev_ts\": 0.032312,\n \"samples_ns\": [ 749659248510, 685636953586, 707269173018 ],\n \"samples_ts\": [ 0.682977, 0.746751, 0.723911 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-11T09:53:31Z", - "avg_ns": 511640724724, - "stddev_ns": 3815490425, - "avg_ts": 1.002264, - "stddev_ts": 0.048742, - "samples_ns": [ - 534172135170, - 515345170971, - 485404868031 - ], - "samples_ts": [ - 0.958493, - 0.993509, - 1.05479 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-11T10:27:29Z", - "avg_ns": 714188458371, - "stddev_ns": 4287179111, - "avg_ts": 0.71788, - "stddev_ts": 0.032312, - "samples_ns": [ - 749659248510, - 685636953586, - 707269173018 - ], - "samples_ts": [ - 0.682977, - 0.746751, - 0.723911 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-12B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 651 - }, - { - "timestamp_utc": "2025-12-11T11:20:33.087838+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T11:03:15Z\",\n \"avg_ns\": 120920870097,\n \"stddev_ns\": 3807252233,\n \"avg_ts\": 1.136627,\n \"stddev_ts\": 0.332620,\n \"samples_ns\": [ 96274888157, 170087916182, 96399805953 ],\n \"samples_ts\": [ 1.32953, 0.752552, 1.3278 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T11:11:43Z\",\n \"avg_ns\": 176085529551,\n \"stddev_ns\": 1832872339,\n \"avg_ts\": 0.760377,\n \"stddev_ts\": 0.210354,\n \"samples_ns\": [ 199428097208, 127585359217, 201243132229 ],\n \"samples_ts\": [ 0.641835, 1.00325, 0.636047 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-11T11:03:15Z", - "avg_ns": 120920870097, - "stddev_ns": 3807252233, - "avg_ts": 1.136627, - "stddev_ts": 0.33262, - "samples_ns": [ - 96274888157, - 170087916182, - 96399805953 - ], - "samples_ts": [ - 1.32953, - 0.752552, - 1.3278 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-11T11:11:43Z", - "avg_ns": 176085529551, - "stddev_ns": 1832872339, - "avg_ts": 0.760377, - "stddev_ts": 0.210354, - "samples_ns": [ - 199428097208, - 127585359217, - 201243132229 - ], - "samples_ts": [ - 0.641835, - 1.00325, - 0.636047 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-12B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 652 - }, - { - "timestamp_utc": "2025-12-11T12:04:19.201657+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T11:20:34Z\",\n \"avg_ns\": 134925043896,\n \"stddev_ns\": 4235108875,\n \"avg_ts\": 0.987433,\n \"stddev_ts\": 0.248287,\n \"samples_ns\": [ 164933290030, 101517410298, 138324431362 ],\n \"samples_ts\": [ 0.776071, 1.26087, 0.925361 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T11:28:55Z\",\n \"avg_ns\": 707262028836,\n \"stddev_ns\": 3832456312,\n \"avg_ts\": 0.724624,\n \"stddev_ts\": 0.027827,\n \"samples_ns\": [ 711164100209, 678600176127, 732021810174 ],\n \"samples_ts\": [ 0.719946, 0.754494, 0.699433 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-11T11:20:34Z", - "avg_ns": 134925043896, - "stddev_ns": 4235108875, - "avg_ts": 0.987433, - "stddev_ts": 0.248287, - "samples_ns": [ - 164933290030, - 101517410298, - 138324431362 - ], - "samples_ts": [ - 0.776071, - 1.26087, - 0.925361 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-11T11:28:55Z", - "avg_ns": 707262028836, - "stddev_ns": 3832456312, - "avg_ts": 0.724624, - "stddev_ts": 0.027827, - "samples_ns": [ - 711164100209, - 678600176127, - 732021810174 - ], - "samples_ts": [ - 0.719946, - 0.754494, - 0.699433 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-12B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 653 - }, - { - "timestamp_utc": "2025-12-11T12:46:38.933281+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T12:04:21Z\",\n \"avg_ns\": 517968325646,\n \"stddev_ns\": 2176125244,\n \"avg_ts\": 0.990170,\n \"stddev_ts\": 0.050838,\n \"samples_ns\": [ 530120867549, 535515244598, 488268864792 ],\n \"samples_ts\": [ 0.965817, 0.956089, 1.0486 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T12:38:20Z\",\n \"avg_ns\": 165342210986,\n \"stddev_ns\": 3839120142,\n \"avg_ts\": 0.776994,\n \"stddev_ts\": 0.058773,\n \"samples_ns\": [ 171359773715, 173136566015, 151530293229 ],\n \"samples_ts\": [ 0.746966, 0.739301, 0.844716 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-11T12:04:21Z", - "avg_ns": 517968325646, - "stddev_ns": 2176125244, - "avg_ts": 0.99017, - "stddev_ts": 0.050838, - "samples_ns": [ - 530120867549, - 535515244598, - 488268864792 - ], - "samples_ts": [ - 0.965817, - 0.956089, - 1.0486 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-11T12:38:20Z", - "avg_ns": 165342210986, - "stddev_ns": 3839120142, - "avg_ts": 0.776994, - "stddev_ts": 0.058773, - "samples_ns": [ - 171359773715, - 173136566015, - 151530293229 - ], - "samples_ts": [ - 0.746966, - 0.739301, - 0.844716 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-12B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 654 - }, - { - "timestamp_utc": "2025-12-11T13:55:18.908620+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T12:46:40Z\",\n \"avg_ns\": 509722745157,\n \"stddev_ns\": 2726071503,\n \"avg_ts\": 1.005776,\n \"stddev_ts\": 0.044099,\n \"samples_ns\": [ 503997196418, 490446628496, 534724410558 ],\n \"samples_ts\": [ 1.01588, 1.04395, 0.957503 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T13:21:03Z\",\n \"avg_ns\": 684638533145,\n \"stddev_ns\": 4251035505,\n \"avg_ts\": 0.748369,\n \"stddev_ts\": 0.024607,\n \"samples_ns\": [ 659187071577, 695742350001, 698986177857 ],\n \"samples_ts\": [ 0.776714, 0.735905, 0.732489 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-11T12:46:40Z", - "avg_ns": 509722745157, - "stddev_ns": 2726071503, - "avg_ts": 1.005776, - "stddev_ts": 0.044099, - "samples_ns": [ - 503997196418, - 490446628496, - 534724410558 - ], - "samples_ts": [ - 1.01588, - 1.04395, - 0.957503 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-11T13:21:03Z", - "avg_ns": 684638533145, - "stddev_ns": 4251035505, - "avg_ts": 0.748369, - "stddev_ts": 0.024607, - "samples_ns": [ - 659187071577, - 695742350001, - 698986177857 - ], - "samples_ts": [ - 0.776714, - 0.735905, - 0.732489 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-12B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 655 - }, - { - "timestamp_utc": "2025-12-11T14:11:47.346775+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T13:55:19Z\",\n \"avg_ns\": 131300331478,\n \"stddev_ns\": 3742408715,\n \"avg_ts\": 0.990740,\n \"stddev_ts\": 0.151486,\n \"samples_ns\": [ 153660382505, 112766500609, 127474111320 ],\n \"samples_ts\": [ 0.833006, 1.13509, 1.00413 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T14:03:30Z\",\n \"avg_ns\": 165051237166,\n \"stddev_ns\": 2295343619,\n \"avg_ts\": 0.781079,\n \"stddev_ts\": 0.082106,\n \"samples_ns\": [ 168295517107, 179984330478, 146873863914 ],\n \"samples_ts\": [ 0.760567, 0.711173, 0.871496 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-11T13:55:19Z", - "avg_ns": 131300331478, - "stddev_ns": 3742408715, - "avg_ts": 0.99074, - "stddev_ts": 0.151486, - "samples_ns": [ - 153660382505, - 112766500609, - 127474111320 - ], - "samples_ts": [ - 0.833006, - 1.13509, - 1.00413 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-11T14:03:30Z", - "avg_ns": 165051237166, - "stddev_ns": 2295343619, - "avg_ts": 0.781079, - "stddev_ts": 0.082106, - "samples_ns": [ - 168295517107, - 179984330478, - 146873863914 - ], - "samples_ts": [ - 0.760567, - 0.711173, - 0.871496 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-12B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 656 - }, - { - "timestamp_utc": "2025-12-11T14:55:30.841378+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T14:11:48Z\",\n \"avg_ns\": 128322107623,\n \"stddev_ns\": 3721064131,\n \"avg_ts\": 1.005110,\n \"stddev_ts\": 0.104180,\n \"samples_ns\": [ 118614749714, 121878036335, 144473536822 ],\n \"samples_ts\": [ 1.07912, 1.05023, 0.885975 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T14:20:41Z\",\n \"avg_ns\": 695949524315,\n \"stddev_ns\": 2209763256,\n \"avg_ts\": 0.736032,\n \"stddev_ts\": 0.019704,\n \"samples_ns\": [ 674823269329, 705034322257, 707990981361 ],\n \"samples_ts\": [ 0.758717, 0.726206, 0.723173 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-11T14:11:48Z", - "avg_ns": 128322107623, - "stddev_ns": 3721064131, - "avg_ts": 1.00511, - "stddev_ts": 0.10418, - "samples_ns": [ - 118614749714, - 121878036335, - 144473536822 - ], - "samples_ts": [ - 1.07912, - 1.05023, - 0.885975 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-11T14:20:41Z", - "avg_ns": 695949524315, - "stddev_ns": 2209763256, - "avg_ts": 0.736032, - "stddev_ts": 0.019704, - "samples_ns": [ - 674823269329, - 705034322257, - 707990981361 - ], - "samples_ts": [ - 0.758717, - 0.726206, - 0.723173 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-12B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 657 - }, - { - "timestamp_utc": "2025-12-11T15:37:35.568382+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T14:55:31Z\",\n \"avg_ns\": 519670420991,\n \"stddev_ns\": 4056098893,\n \"avg_ts\": 0.986617,\n \"stddev_ts\": 0.045714,\n \"samples_ns\": [ 492721372935, 535737072143, 530552817895 ],\n \"samples_ts\": [ 1.03913, 0.955693, 0.965031 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T15:29:56Z\",\n \"avg_ns\": 152323233429,\n \"stddev_ns\": 2268695219,\n \"avg_ts\": 0.874929,\n \"stddev_ts\": 0.198847,\n \"samples_ns\": [ 127784821213, 198218966870, 130965912206 ],\n \"samples_ts\": [ 1.00168, 0.645751, 0.977354 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-11T14:55:31Z", - "avg_ns": 519670420991, - "stddev_ns": 4056098893, - "avg_ts": 0.986617, - "stddev_ts": 0.045714, - "samples_ns": [ - 492721372935, - 535737072143, - 530552817895 - ], - "samples_ts": [ - 1.03913, - 0.955693, - 0.965031 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-11T15:29:56Z", - "avg_ns": 152323233429, - "stddev_ns": 2268695219, - "avg_ts": 0.874929, - "stddev_ts": 0.198847, - "samples_ns": [ - 127784821213, - 198218966870, - 130965912206 - ], - "samples_ts": [ - 1.00168, - 0.645751, - 0.977354 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-12B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 658 - }, - { - "timestamp_utc": "2025-12-11T16:45:24.295728+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T15:37:37Z\",\n \"avg_ns\": 511389065987,\n \"stddev_ns\": 3327876219,\n \"avg_ts\": 1.002350,\n \"stddev_ts\": 0.041177,\n \"samples_ns\": [ 536233478163, 498672959138, 499260760660 ],\n \"samples_ts\": [ 0.954808, 1.02673, 1.02552 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T16:12:01Z\",\n \"avg_ns\": 667157147430,\n \"stddev_ns\": 3689552768,\n \"avg_ts\": 0.767514,\n \"stddev_ts\": 0.009486,\n \"samples_ns\": [ 662787633835, 676733354947, 661950453510 ],\n \"samples_ts\": [ 0.772495, 0.756576, 0.773472 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-11T15:37:37Z", - "avg_ns": 511389065987, - "stddev_ns": 3327876219, - "avg_ts": 1.00235, - "stddev_ts": 0.041177, - "samples_ns": [ - 536233478163, - 498672959138, - 499260760660 - ], - "samples_ts": [ - 0.954808, - 1.02673, - 1.02552 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-11T16:12:01Z", - "avg_ns": 667157147430, - "stddev_ns": 3689552768, - "avg_ts": 0.767514, - "stddev_ts": 0.009486, - "samples_ns": [ - 662787633835, - 676733354947, - 661950453510 - ], - "samples_ts": [ - 0.772495, - 0.756576, - 0.773472 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-12B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 659 - }, - { - "timestamp_utc": "2025-12-11T17:01:33.912045+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T16:45:25Z\",\n \"avg_ns\": 132017008472,\n \"stddev_ns\": 4165598947,\n \"avg_ts\": 0.990677,\n \"stddev_ts\": 0.177315,\n \"samples_ns\": [ 156503115530, 109197845764, 130350064124 ],\n \"samples_ts\": [ 0.817875, 1.17218, 0.981971 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T16:53:37Z\",\n \"avg_ns\": 158263813611,\n \"stddev_ns\": 4165922581,\n \"avg_ts\": 0.810972,\n \"stddev_ts\": 0.052053,\n \"samples_ns\": [ 159556187072, 167573157367, 147662096395 ],\n \"samples_ts\": [ 0.802225, 0.763845, 0.866844 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-11T16:45:25Z", - "avg_ns": 132017008472, - "stddev_ns": 4165598947, - "avg_ts": 0.990677, - "stddev_ts": 0.177315, - "samples_ns": [ - 156503115530, - 109197845764, - 130350064124 - ], - "samples_ts": [ - 0.817875, - 1.17218, - 0.981971 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-11T16:53:37Z", - "avg_ns": 158263813611, - "stddev_ns": 4165922581, - "avg_ts": 0.810972, - "stddev_ts": 0.052053, - "samples_ns": [ - 159556187072, - 167573157367, - 147662096395 - ], - "samples_ts": [ - 0.802225, - 0.763845, - 0.866844 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-12B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 660 - }, - { - "timestamp_utc": "2025-12-11T17:43:04.187827+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T17:01:34Z\",\n \"avg_ns\": 133645263819,\n \"stddev_ns\": 4123240685,\n \"avg_ts\": 0.971518,\n \"stddev_ts\": 0.139150,\n \"samples_ns\": [ 129255037036, 116328744816, 155352009606 ],\n \"samples_ts\": [ 0.99029, 1.10033, 0.823935 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T17:10:33Z\",\n \"avg_ns\": 649913717192,\n \"stddev_ns\": 1563606119,\n \"avg_ts\": 0.787846,\n \"stddev_ts\": 0.007580,\n \"samples_ns\": [ 648473588815, 644487003828, 656780558934 ],\n \"samples_ts\": [ 0.789546, 0.79443, 0.77956 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-11T17:01:34Z", - "avg_ns": 133645263819, - "stddev_ns": 4123240685, - "avg_ts": 0.971518, - "stddev_ts": 0.13915, - "samples_ns": [ - 129255037036, - 116328744816, - 155352009606 - ], - "samples_ts": [ - 0.99029, - 1.10033, - 0.823935 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-11T17:10:33Z", - "avg_ns": 649913717192, - "stddev_ns": 1563606119, - "avg_ts": 0.787846, - "stddev_ts": 0.00758, - "samples_ns": [ - 648473588815, - 644487003828, - 656780558934 - ], - "samples_ts": [ - 0.789546, - 0.79443, - 0.77956 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-12B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 661 - }, - { - "timestamp_utc": "2025-12-11T18:25:44.897029+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T17:43:06Z\",\n \"avg_ns\": 509829805709,\n \"stddev_ns\": 3534471142,\n \"avg_ts\": 1.005313,\n \"stddev_ts\": 0.039500,\n \"samples_ns\": [ 533215449962, 500992607513, 495281359652 ],\n \"samples_ts\": [ 0.960212, 1.02197, 1.03376 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T18:17:08Z\",\n \"avg_ns\": 171703742600,\n \"stddev_ns\": 1542966209,\n \"avg_ts\": 0.772387,\n \"stddev_ts\": 0.188233,\n \"samples_ns\": [ 196890048829, 129404463554, 188816715418 ],\n \"samples_ts\": [ 0.650109, 0.989147, 0.677906 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-11T17:43:06Z", - "avg_ns": 509829805709, - "stddev_ns": 3534471142, - "avg_ts": 1.005313, - "stddev_ts": 0.0395, - "samples_ns": [ - 533215449962, - 500992607513, - 495281359652 - ], - "samples_ts": [ - 0.960212, - 1.02197, - 1.03376 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-11T18:17:08Z", - "avg_ns": 171703742600, - "stddev_ns": 1542966209, - "avg_ts": 0.772387, - "stddev_ts": 0.188233, - "samples_ns": [ - 196890048829, - 129404463554, - 188816715418 - ], - "samples_ts": [ - 0.650109, - 0.989147, - 0.677906 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-12B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 662 - }, - { - "timestamp_utc": "2025-12-11T19:31:03.653562+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T18:25:46Z\",\n \"avg_ns\": 515277420197,\n \"stddev_ns\": 4272980258,\n \"avg_ts\": 0.995759,\n \"stddev_ts\": 0.057184,\n \"samples_ns\": [ 482224373113, 532950148933, 530657738545 ],\n \"samples_ts\": [ 1.06175, 0.96069, 0.96484 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T19:00:03Z\",\n \"avg_ns\": 619530525395,\n \"stddev_ns\": 1597316358,\n \"avg_ts\": 0.826436,\n \"stddev_ts\": 0.002129,\n \"samples_ns\": [ 621299774329, 619097273248, 618194528610 ],\n \"samples_ts\": [ 0.824079, 0.827011, 0.828218 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-11T18:25:46Z", - "avg_ns": 515277420197, - "stddev_ns": 4272980258, - "avg_ts": 0.995759, - "stddev_ts": 0.057184, - "samples_ns": [ - 482224373113, - 532950148933, - 530657738545 - ], - "samples_ts": [ - 1.06175, - 0.96069, - 0.96484 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-11T19:00:03Z", - "avg_ns": 619530525395, - "stddev_ns": 1597316358, - "avg_ts": 0.826436, - "stddev_ts": 0.002129, - "samples_ns": [ - 621299774329, - 619097273248, - 618194528610 - ], - "samples_ts": [ - 0.824079, - 0.827011, - 0.828218 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-12B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 663 - }, - { - "timestamp_utc": "2025-12-11T19:47:12.458216+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T19:31:04Z\",\n \"avg_ns\": 136537663098,\n \"stddev_ns\": 4158418861,\n \"avg_ts\": 0.990096,\n \"stddev_ts\": 0.302535,\n \"samples_ns\": [ 152981543816, 95622143329, 161009302149 ],\n \"samples_ts\": [ 0.836702, 1.3386, 0.794985 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T19:39:47Z\",\n \"avg_ns\": 147655052480,\n \"stddev_ns\": 3521416996,\n \"avg_ts\": 0.907500,\n \"stddev_ts\": 0.219740,\n \"samples_ns\": [ 128503290938, 194737442534, 119724423969 ],\n \"samples_ts\": [ 0.996083, 0.657295, 1.06912 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-11T19:31:04Z", - "avg_ns": 136537663098, - "stddev_ns": 4158418861, - "avg_ts": 0.990096, - "stddev_ts": 0.302535, - "samples_ns": [ - 152981543816, - 95622143329, - 161009302149 - ], - "samples_ts": [ - 0.836702, - 1.3386, - 0.794985 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-11T19:39:47Z", - "avg_ns": 147655052480, - "stddev_ns": 3521416996, - "avg_ts": 0.9075, - "stddev_ts": 0.21974, - "samples_ns": [ - 128503290938, - 194737442534, - 119724423969 - ], - "samples_ts": [ - 0.996083, - 0.657295, - 1.06912 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-12B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 664 - }, - { - "timestamp_utc": "2025-12-11T20:28:02.825052+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T19:47:13Z\",\n \"avg_ns\": 121744043787,\n \"stddev_ns\": 1034136466,\n \"avg_ts\": 1.072640,\n \"stddev_ts\": 0.190873,\n \"samples_ns\": [ 99597682324, 139765259149, 125869189889 ],\n \"samples_ts\": [ 1.28517, 0.915821, 1.01693 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T19:56:05Z\",\n \"avg_ns\": 638597984215,\n \"stddev_ns\": 3852317935,\n \"avg_ts\": 0.801919,\n \"stddev_ts\": 0.013929,\n \"samples_ns\": [ 631658751886, 632612120049, 651523080711 ],\n \"samples_ts\": [ 0.810564, 0.809343, 0.785851 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-11T19:47:13Z", - "avg_ns": 121744043787, - "stddev_ns": 1034136466, - "avg_ts": 1.07264, - "stddev_ts": 0.190873, - "samples_ns": [ - 99597682324, - 139765259149, - 125869189889 - ], - "samples_ts": [ - 1.28517, - 0.915821, - 1.01693 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-11T19:56:05Z", - "avg_ns": 638597984215, - "stddev_ns": 3852317935, - "avg_ts": 0.801919, - "stddev_ts": 0.013929, - "samples_ns": [ - 631658751886, - 632612120049, - 651523080711 - ], - "samples_ts": [ - 0.810564, - 0.809343, - 0.785851 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-12B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 665 - }, - { - "timestamp_utc": "2025-12-11T21:10:40.080125+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T20:28:04Z\",\n \"avg_ns\": 508599877268,\n \"stddev_ns\": 4284673799,\n \"avg_ts\": 1.007927,\n \"stddev_ts\": 0.042870,\n \"samples_ns\": [ 533794096001, 499564917503, 492440618301 ],\n \"samples_ts\": [ 0.959171, 1.02489, 1.03972 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T21:02:03Z\",\n \"avg_ns\": 171639398009,\n \"stddev_ns\": 891310525,\n \"avg_ts\": 0.778833,\n \"stddev_ts\": 0.211376,\n \"samples_ns\": [ 196537570595, 125143745602, 193236877831 ],\n \"samples_ts\": [ 0.651275, 1.02282, 0.662399 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-11T20:28:04Z", - "avg_ns": 508599877268, - "stddev_ns": 4284673799, - "avg_ts": 1.007927, - "stddev_ts": 0.04287, - "samples_ns": [ - 533794096001, - 499564917503, - 492440618301 - ], - "samples_ts": [ - 0.959171, - 1.02489, - 1.03972 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-11T21:02:03Z", - "avg_ns": 171639398009, - "stddev_ns": 891310525, - "avg_ts": 0.778833, - "stddev_ts": 0.211376, - "samples_ns": [ - 196537570595, - 125143745602, - 193236877831 - ], - "samples_ts": [ - 0.651275, - 1.02282, - 0.662399 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-12B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 666 - }, - { - "timestamp_utc": "2025-12-11T22:17:44.546004+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T21:10:41Z\",\n \"avg_ns\": 516447022332,\n \"stddev_ns\": 605810764,\n \"avg_ts\": 0.993181,\n \"stddev_ts\": 0.052421,\n \"samples_ns\": [ 486012343833, 534241382909, 529087340256 ],\n \"samples_ts\": [ 1.05347, 0.958368, 0.967704 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T21:44:58Z\",\n \"avg_ns\": 654540343903,\n \"stddev_ns\": 3879268703,\n \"avg_ts\": 0.782325,\n \"stddev_ts\": 0.010632,\n \"samples_ns\": [ 646391237885, 653155374778, 664074419048 ],\n \"samples_ts\": [ 0.79209, 0.783887, 0.770998 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-11T21:10:41Z", - "avg_ns": 516447022332, - "stddev_ns": 605810764, - "avg_ts": 0.993181, - "stddev_ts": 0.052421, - "samples_ns": [ - 486012343833, - 534241382909, - 529087340256 - ], - "samples_ts": [ - 1.05347, - 0.958368, - 0.967704 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-11T21:44:58Z", - "avg_ns": 654540343903, - "stddev_ns": 3879268703, - "avg_ts": 0.782325, - "stddev_ts": 0.010632, - "samples_ns": [ - 646391237885, - 653155374778, - 664074419048 - ], - "samples_ts": [ - 0.79209, - 0.783887, - 0.770998 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-12B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 667 - }, - { - "timestamp_utc": "2025-12-11T22:34:59.724816+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T22:17:46Z\",\n \"avg_ns\": 120850213179,\n \"stddev_ns\": 1851761473,\n \"avg_ts\": 1.106124,\n \"stddev_ts\": 0.265855,\n \"samples_ns\": [ 95917024374, 157195642576, 109437972587 ],\n \"samples_ts\": [ 1.33449, 0.814272, 1.16961 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T22:26:27Z\",\n \"avg_ns\": 170193251282,\n \"stddev_ns\": 950050981,\n \"avg_ts\": 0.765818,\n \"stddev_ts\": 0.130785,\n \"samples_ns\": [ 179573364849, 139923207836, 191083181162 ],\n \"samples_ts\": [ 0.712801, 0.914787, 0.669865 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-11T22:17:46Z", - "avg_ns": 120850213179, - "stddev_ns": 1851761473, - "avg_ts": 1.106124, - "stddev_ts": 0.265855, - "samples_ns": [ - 95917024374, - 157195642576, - 109437972587 - ], - "samples_ts": [ - 1.33449, - 0.814272, - 1.16961 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-11T22:26:27Z", - "avg_ns": 170193251282, - "stddev_ns": 950050981, - "avg_ts": 0.765818, - "stddev_ts": 0.130785, - "samples_ns": [ - 179573364849, - 139923207836, - 191083181162 - ], - "samples_ts": [ - 0.712801, - 0.914787, - 0.669865 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-12B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 668 - }, - { - "timestamp_utc": "2025-12-11T23:15:55.064342+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T22:35:01Z\",\n \"avg_ns\": 126768376177,\n \"stddev_ns\": 548898757,\n \"avg_ts\": 1.017004,\n \"stddev_ts\": 0.104786,\n \"samples_ns\": [ 140773219253, 125099212203, 114432697076 ],\n \"samples_ts\": [ 0.909264, 1.02319, 1.11856 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T22:43:01Z\",\n \"avg_ns\": 657198611192,\n \"stddev_ns\": 3778619039,\n \"avg_ts\": 0.779204,\n \"stddev_ts\": 0.012836,\n \"samples_ns\": [ 666180317565, 660139565765, 645275950247 ],\n \"samples_ts\": [ 0.768561, 0.775594, 0.793459 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-11T22:35:01Z", - "avg_ns": 126768376177, - "stddev_ns": 548898757, - "avg_ts": 1.017004, - "stddev_ts": 0.104786, - "samples_ns": [ - 140773219253, - 125099212203, - 114432697076 - ], - "samples_ts": [ - 0.909264, - 1.02319, - 1.11856 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-11T22:43:01Z", - "avg_ns": 657198611192, - "stddev_ns": 3778619039, - "avg_ts": 0.779204, - "stddev_ts": 0.012836, - "samples_ns": [ - 666180317565, - 660139565765, - 645275950247 - ], - "samples_ts": [ - 0.768561, - 0.775594, - 0.793459 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-12B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 669 - }, - { - "timestamp_utc": "2025-12-11T23:58:24.880887+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T23:15:56Z\",\n \"avg_ns\": 517536368062,\n \"stddev_ns\": 1312013597,\n \"avg_ts\": 0.990763,\n \"stddev_ts\": 0.046986,\n \"samples_ns\": [ 491042162842, 523277697206, 538289244138 ],\n \"samples_ts\": [ 1.04268, 0.978448, 0.951161 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T23:50:35Z\",\n \"avg_ns\": 155884800607,\n \"stddev_ns\": 4180179800,\n \"avg_ts\": 0.832168,\n \"stddev_ts\": 0.115090,\n \"samples_ns\": [ 136737087836, 180668405266, 150248908721 ],\n \"samples_ts\": [ 0.936103, 0.70848, 0.85192 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-11T23:15:56Z", - "avg_ns": 517536368062, - "stddev_ns": 1312013597, - "avg_ts": 0.990763, - "stddev_ts": 0.046986, - "samples_ns": [ - 491042162842, - 523277697206, - 538289244138 - ], - "samples_ts": [ - 1.04268, - 0.978448, - 0.951161 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-11T23:50:35Z", - "avg_ns": 155884800607, - "stddev_ns": 4180179800, - "avg_ts": 0.832168, - "stddev_ts": 0.11509, - "samples_ns": [ - 136737087836, - 180668405266, - 150248908721 - ], - "samples_ts": [ - 0.936103, - 0.70848, - 0.85192 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-12B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 670 - }, - { - "timestamp_utc": "2025-12-12T01:06:48.037318+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T23:58:26Z\",\n \"avg_ns\": 517095396344,\n \"stddev_ns\": 272528241,\n \"avg_ts\": 0.991631,\n \"stddev_ts\": 0.047414,\n \"samples_ns\": [ 537760033568, 523197786668, 490328368797 ],\n \"samples_ts\": [ 0.952098, 0.978597, 1.0442 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T00:32:47Z\",\n \"avg_ns\": 679642554211,\n \"stddev_ns\": 1833512865,\n \"avg_ts\": 0.753928,\n \"stddev_ts\": 0.025752,\n \"samples_ns\": [ 704764967292, 675694650697, 658468044646 ],\n \"samples_ts\": [ 0.726483, 0.757739, 0.777562 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-11T23:58:26Z", - "avg_ns": 517095396344, - "stddev_ns": 272528241, - "avg_ts": 0.991631, - "stddev_ts": 0.047414, - "samples_ns": [ - 537760033568, - 523197786668, - 490328368797 - ], - "samples_ts": [ - 0.952098, - 0.978597, - 1.0442 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-12T00:32:47Z", - "avg_ns": 679642554211, - "stddev_ns": 1833512865, - "avg_ts": 0.753928, - "stddev_ts": 0.025752, - "samples_ns": [ - 704764967292, - 675694650697, - 658468044646 - ], - "samples_ts": [ - 0.726483, - 0.757739, - 0.777562 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-12B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 671 - }, - { - "timestamp_utc": "2025-12-12T01:24:02.303179+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T01:06:49Z\",\n \"avg_ns\": 128736984643,\n \"stddev_ns\": 4124047376,\n \"avg_ts\": 1.001925,\n \"stddev_ts\": 0.104091,\n \"samples_ns\": [ 119391421430, 121749125470, 145070407031 ],\n \"samples_ts\": [ 1.0721, 1.05134, 0.88233 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T01:15:42Z\",\n \"avg_ns\": 166028785437,\n \"stddev_ns\": 3539712165,\n \"avg_ts\": 0.774801,\n \"stddev_ts\": 0.068350,\n \"samples_ns\": [ 150128983929, 176697920786, 171259451597 ],\n \"samples_ts\": [ 0.8526, 0.7244, 0.747404 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-12T01:06:49Z", - "avg_ns": 128736984643, - "stddev_ns": 4124047376, - "avg_ts": 1.001925, - "stddev_ts": 0.104091, - "samples_ns": [ - 119391421430, - 121749125470, - 145070407031 - ], - "samples_ts": [ - 1.0721, - 1.05134, - 0.88233 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-12T01:15:42Z", - "avg_ns": 166028785437, - "stddev_ns": 3539712165, - "avg_ts": 0.774801, - "stddev_ts": 0.06835, - "samples_ns": [ - 150128983929, - 176697920786, - 171259451597 - ], - "samples_ts": [ - 0.8526, - 0.7244, - 0.747404 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-12B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 672 - }, - { - "timestamp_utc": "2025-12-12T02:07:36.604487+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T01:24:04Z\",\n \"avg_ns\": 120953801111,\n \"stddev_ns\": 4162853544,\n \"avg_ts\": 1.093628,\n \"stddev_ts\": 0.238275,\n \"samples_ns\": [ 116584911316, 150016433633, 96260058384 ],\n \"samples_ts\": [ 1.09791, 0.85324, 1.32973 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T01:32:12Z\",\n \"avg_ns\": 707653692988,\n \"stddev_ns\": 3948731739,\n \"avg_ts\": 0.724239,\n \"stddev_ts\": 0.027727,\n \"samples_ns\": [ 739067531014, 696696900428, 687196647524 ],\n \"samples_ts\": [ 0.692765, 0.734896, 0.745056 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-12T01:24:04Z", - "avg_ns": 120953801111, - "stddev_ns": 4162853544, - "avg_ts": 1.093628, - "stddev_ts": 0.238275, - "samples_ns": [ - 116584911316, - 150016433633, - 96260058384 - ], - "samples_ts": [ - 1.09791, - 0.85324, - 1.32973 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-12T01:32:12Z", - "avg_ns": 707653692988, - "stddev_ns": 3948731739, - "avg_ts": 0.724239, - "stddev_ts": 0.027727, - "samples_ns": [ - 739067531014, - 696696900428, - 687196647524 - ], - "samples_ts": [ - 0.692765, - 0.734896, - 0.745056 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-12B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 673 - }, - { - "timestamp_utc": "2025-12-12T02:51:07.825165+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T02:07:38Z\",\n \"avg_ns\": 511538431018,\n \"stddev_ns\": 3960822846,\n \"avg_ts\": 1.002046,\n \"stddev_ts\": 0.041006,\n \"samples_ns\": [ 536115225918, 496927138882, 501572928254 ],\n \"samples_ts\": [ 0.955019, 1.03033, 1.02079 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T02:42:00Z\",\n \"avg_ns\": 182029834398,\n \"stddev_ns\": 1437742509,\n \"avg_ts\": 0.717382,\n \"stddev_ts\": 0.128450,\n \"samples_ns\": [ 207049878900, 148408702166, 190630922130 ],\n \"samples_ts\": [ 0.618209, 0.862483, 0.671455 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-12T02:07:38Z", - "avg_ns": 511538431018, - "stddev_ns": 3960822846, - "avg_ts": 1.002046, - "stddev_ts": 0.041006, - "samples_ns": [ - 536115225918, - 496927138882, - 501572928254 - ], - "samples_ts": [ - 0.955019, - 1.03033, - 1.02079 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-12T02:42:00Z", - "avg_ns": 182029834398, - "stddev_ns": 1437742509, - "avg_ts": 0.717382, - "stddev_ts": 0.12845, - "samples_ns": [ - 207049878900, - 148408702166, - 190630922130 - ], - "samples_ts": [ - 0.618209, - 0.862483, - 0.671455 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-12B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 674 - }, - { - "timestamp_utc": "2025-12-12T04:01:09.202591+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T02:51:08Z\",\n \"avg_ns\": 511891066004,\n \"stddev_ns\": 2169180092,\n \"avg_ts\": 1.001817,\n \"stddev_ts\": 0.049373,\n \"samples_ns\": [ 485402853656, 515314162931, 534956181427 ],\n \"samples_ts\": [ 1.05479, 0.993569, 0.957088 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T03:25:36Z\",\n \"avg_ns\": 710510124922,\n \"stddev_ns\": 4175543284,\n \"avg_ts\": 0.721321,\n \"stddev_ts\": 0.027634,\n \"samples_ns\": [ 685892313582, 740161967125, 705476094060 ],\n \"samples_ts\": [ 0.746473, 0.69174, 0.725751 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-12T02:51:08Z", - "avg_ns": 511891066004, - "stddev_ns": 2169180092, - "avg_ts": 1.001817, - "stddev_ts": 0.049373, - "samples_ns": [ - 485402853656, - 515314162931, - 534956181427 - ], - "samples_ts": [ - 1.05479, - 0.993569, - 0.957088 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-12T03:25:36Z", - "avg_ns": 710510124922, - "stddev_ns": 4175543284, - "avg_ts": 0.721321, - "stddev_ts": 0.027634, - "samples_ns": [ - 685892313582, - 740161967125, - 705476094060 - ], - "samples_ts": [ - 0.746473, - 0.69174, - 0.725751 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-12B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 675 - }, - { - "timestamp_utc": "2025-12-12T04:17:56.978822+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T04:01:10Z\",\n \"avg_ns\": 136994229024,\n \"stddev_ns\": 1664880068,\n \"avg_ts\": 0.986924,\n \"stddev_ts\": 0.298612,\n \"samples_ns\": [ 147824245606, 96454484686, 166703956781 ],\n \"samples_ts\": [ 0.865893, 1.32705, 0.767828 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T04:10:02Z\",\n \"avg_ns\": 157465047232,\n \"stddev_ns\": 1492571004,\n \"avg_ts\": 0.843126,\n \"stddev_ts\": 0.185907,\n \"samples_ns\": [ 128572455035, 201255921247, 142566765415 ],\n \"samples_ts\": [ 0.995548, 0.636006, 0.897825 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-12T04:01:10Z", - "avg_ns": 136994229024, - "stddev_ns": 1664880068, - "avg_ts": 0.986924, - "stddev_ts": 0.298612, - "samples_ns": [ - 147824245606, - 96454484686, - 166703956781 - ], - "samples_ts": [ - 0.865893, - 1.32705, - 0.767828 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-12T04:10:02Z", - "avg_ns": 157465047232, - "stddev_ns": 1492571004, - "avg_ts": 0.843126, - "stddev_ts": 0.185907, - "samples_ns": [ - 128572455035, - 201255921247, - 142566765415 - ], - "samples_ts": [ - 0.995548, - 0.636006, - 0.897825 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-12B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 676 - }, - { - "timestamp_utc": "2025-12-12T05:01:58.747371+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T04:17:58Z\",\n \"avg_ns\": 120861173607,\n \"stddev_ns\": 3621484899,\n \"avg_ts\": 1.115367,\n \"stddev_ts\": 0.286439,\n \"samples_ns\": [ 95957907354, 161801183647, 104824429822 ],\n \"samples_ts\": [ 1.33392, 0.791094, 1.22109 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T04:26:34Z\",\n \"avg_ns\": 707612874408,\n \"stddev_ns\": 4011783685,\n \"avg_ts\": 0.724096,\n \"stddev_ts\": 0.024240,\n \"samples_ns\": [ 711807114850, 728707482140, 682324026236 ],\n \"samples_ts\": [ 0.719296, 0.702614, 0.750377 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-12T04:17:58Z", - "avg_ns": 120861173607, - "stddev_ns": 3621484899, - "avg_ts": 1.115367, - "stddev_ts": 0.286439, - "samples_ns": [ - 95957907354, - 161801183647, - 104824429822 - ], - "samples_ts": [ - 1.33392, - 0.791094, - 1.22109 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-12T04:26:34Z", - "avg_ns": 707612874408, - "stddev_ns": 4011783685, - "avg_ts": 0.724096, - "stddev_ts": 0.02424, - "samples_ns": [ - 711807114850, - 728707482140, - 682324026236 - ], - "samples_ts": [ - 0.719296, - 0.702614, - 0.750377 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-12B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 677 - }, - { - "timestamp_utc": "2025-12-12T05:45:14.845435+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T05:01:59Z\",\n \"avg_ns\": 510926114847,\n \"stddev_ns\": 659437316,\n \"avg_ts\": 1.003411,\n \"stddev_ts\": 0.044944,\n \"samples_ns\": [ 521757956180, 485249385793, 525771002570 ],\n \"samples_ts\": [ 0.981298, 1.05513, 0.973808 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T05:36:28Z\",\n \"avg_ns\": 175129717912,\n \"stddev_ns\": 3916820766,\n \"avg_ts\": 0.747180,\n \"stddev_ts\": 0.140790,\n \"samples_ns\": [ 184013228532, 141204615681, 200171309524 ],\n \"samples_ts\": [ 0.695602, 0.906486, 0.639452 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-12T05:01:59Z", - "avg_ns": 510926114847, - "stddev_ns": 659437316, - "avg_ts": 1.003411, - "stddev_ts": 0.044944, - "samples_ns": [ - 521757956180, - 485249385793, - 525771002570 - ], - "samples_ts": [ - 0.981298, - 1.05513, - 0.973808 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-12T05:36:28Z", - "avg_ns": 175129717912, - "stddev_ns": 3916820766, - "avg_ts": 0.74718, - "stddev_ts": 0.14079, - "samples_ns": [ - 184013228532, - 141204615681, - 200171309524 - ], - "samples_ts": [ - 0.695602, - 0.906486, - 0.639452 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-12B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 678 - }, - { - "timestamp_utc": "2025-12-12T06:53:46.889766+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T05:45:15Z\",\n \"avg_ns\": 531768462785,\n \"stddev_ns\": 2066691201,\n \"avg_ts\": 0.964380,\n \"stddev_ts\": 0.047349,\n \"samples_ns\": [ 506486469187, 558811415788, 530007503381 ],\n \"samples_ts\": [ 1.01089, 0.91623, 0.966024 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T06:20:00Z\",\n \"avg_ns\": 674797894294,\n \"stddev_ns\": 2395400130,\n \"avg_ts\": 0.759325,\n \"stddev_ts\": 0.025440,\n \"samples_ns\": [ 661487146945, 661488595378, 701417940559 ],\n \"samples_ts\": [ 0.774014, 0.774012, 0.72995 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-12T05:45:15Z", - "avg_ns": 531768462785, - "stddev_ns": 2066691201, - "avg_ts": 0.96438, - "stddev_ts": 0.047349, - "samples_ns": [ - 506486469187, - 558811415788, - 530007503381 - ], - "samples_ts": [ - 1.01089, - 0.91623, - 0.966024 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-12T06:20:00Z", - "avg_ns": 674797894294, - "stddev_ns": 2395400130, - "avg_ts": 0.759325, - "stddev_ts": 0.02544, - "samples_ns": [ - 661487146945, - 661488595378, - 701417940559 - ], - "samples_ts": [ - 0.774014, - 0.774012, - 0.72995 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-12B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 679 - }, - { - "timestamp_utc": "2025-12-12T07:10:47.418741+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T06:53:48Z\",\n \"avg_ns\": 120981850861,\n \"stddev_ns\": 2112998401,\n \"avg_ts\": 1.100027,\n \"stddev_ts\": 0.253261,\n \"samples_ns\": [ 111599222153, 154854551450, 96491778982 ],\n \"samples_ts\": [ 1.14696, 0.826582, 1.32654 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T07:02:01Z\",\n \"avg_ns\": 174879607259,\n \"stddev_ns\": 3959774283,\n \"avg_ts\": 0.752647,\n \"stddev_ts\": 0.161131,\n \"samples_ns\": [ 200571834404, 136619652994, 187447334381 ],\n \"samples_ts\": [ 0.638175, 0.936908, 0.682858 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-12T06:53:48Z", - "avg_ns": 120981850861, - "stddev_ns": 2112998401, - "avg_ts": 1.100027, - "stddev_ts": 0.253261, - "samples_ns": [ - 111599222153, - 154854551450, - 96491778982 - ], - "samples_ts": [ - 1.14696, - 0.826582, - 1.32654 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-12T07:02:01Z", - "avg_ns": 174879607259, - "stddev_ns": 3959774283, - "avg_ts": 0.752647, - "stddev_ts": 0.161131, - "samples_ns": [ - 200571834404, - 136619652994, - 187447334381 - ], - "samples_ts": [ - 0.638175, - 0.936908, - 0.682858 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-12B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 680 - }, - { - "timestamp_utc": "2025-12-12T07:54:01.769980+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T07:10:48Z\",\n \"avg_ns\": 136974587914,\n \"stddev_ns\": 4042742778,\n \"avg_ts\": 0.985817,\n \"stddev_ts\": 0.297435,\n \"samples_ns\": [ 162402068996, 96393637280, 152128057467 ],\n \"samples_ts\": [ 0.788167, 1.32789, 0.841396 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T07:19:23Z\",\n \"avg_ns\": 691970265169,\n \"stddev_ns\": 2651482420,\n \"avg_ts\": 0.740704,\n \"stddev_ts\": 0.029407,\n \"samples_ns\": [ 685997787464, 667646846940, 722266161104 ],\n \"samples_ts\": [ 0.746358, 0.766872, 0.70888 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-12T07:10:48Z", - "avg_ns": 136974587914, - "stddev_ns": 4042742778, - "avg_ts": 0.985817, - "stddev_ts": 0.297435, - "samples_ns": [ - 162402068996, - 96393637280, - 152128057467 - ], - "samples_ts": [ - 0.788167, - 1.32789, - 0.841396 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-12T07:19:23Z", - "avg_ns": 691970265169, - "stddev_ns": 2651482420, - "avg_ts": 0.740704, - "stddev_ts": 0.029407, - "samples_ns": [ - 685997787464, - 667646846940, - 722266161104 - ], - "samples_ts": [ - 0.746358, - 0.766872, - 0.70888 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-12B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 681 - }, - { - "timestamp_utc": "2025-12-12T08:36:40.048695+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T07:54:03Z\",\n \"avg_ns\": 522426510775,\n \"stddev_ns\": 4199555837,\n \"avg_ts\": 0.981702,\n \"stddev_ts\": 0.050167,\n \"samples_ns\": [ 537057622760, 537737125590, 492484783975 ],\n \"samples_ts\": [ 0.953343, 0.952138, 1.03963 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T08:28:22Z\",\n \"avg_ns\": 165348333527,\n \"stddev_ns\": 554463871,\n \"avg_ts\": 0.777954,\n \"stddev_ts\": 0.068243,\n \"samples_ns\": [ 170165654917, 176228514152, 149650831514 ],\n \"samples_ts\": [ 0.752208, 0.72633, 0.855324 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-12T07:54:03Z", - "avg_ns": 522426510775, - "stddev_ns": 4199555837, - "avg_ts": 0.981702, - "stddev_ts": 0.050167, - "samples_ns": [ - 537057622760, - 537737125590, - 492484783975 - ], - "samples_ts": [ - 0.953343, - 0.952138, - 1.03963 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-12T08:28:22Z", - "avg_ns": 165348333527, - "stddev_ns": 554463871, - "avg_ts": 0.777954, - "stddev_ts": 0.068243, - "samples_ns": [ - 170165654917, - 176228514152, - 149650831514 - ], - "samples_ts": [ - 0.752208, - 0.72633, - 0.855324 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-12B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 682 - }, - { - "timestamp_utc": "2025-12-12T09:45:44.868279+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "--threads", - "1", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T08:36:41Z\",\n \"avg_ns\": 514144544415,\n \"stddev_ns\": 4144224253,\n \"avg_ts\": 0.996879,\n \"stddev_ts\": 0.039830,\n \"samples_ns\": [ 517350829534, 492390238265, 532692565446 ],\n \"samples_ts\": [ 0.989657, 1.03983, 0.961155 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T09:11:22Z\",\n \"avg_ns\": 687045389834,\n \"stddev_ns\": 3578517202,\n \"avg_ts\": 0.745693,\n \"stddev_ts\": 0.022830,\n \"samples_ns\": [ 670741124028, 711202391679, 679192653796 ],\n \"samples_ts\": [ 0.763335, 0.719908, 0.753836 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-12T08:36:41Z", - "avg_ns": 514144544415, - "stddev_ns": 4144224253, - "avg_ts": 0.996879, - "stddev_ts": 0.03983, - "samples_ns": [ - 517350829534, - 492390238265, - 532692565446 - ], - "samples_ts": [ - 0.989657, - 1.03983, - 0.961155 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 1, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-12T09:11:22Z", - "avg_ns": 687045389834, - "stddev_ns": 3578517202, - "avg_ts": 0.745693, - "stddev_ts": 0.02283, - "samples_ns": [ - 670741124028, - 711202391679, - 679192653796 - ], - "samples_ts": [ - 0.763335, - 0.719908, - 0.753836 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-12B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 1, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 683 - }, - { - "timestamp_utc": "2025-12-12T10:00:32.141721+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T09:45:45Z\",\n \"avg_ns\": 112835325904,\n \"stddev_ns\": 839118946,\n \"avg_ts\": 1.263564,\n \"stddev_ts\": 0.475573,\n \"samples_ns\": [ 165096514562, 74188096354, 99221366798 ],\n \"samples_ts\": [ 0.775304, 1.72534, 1.29004 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T09:52:22Z\",\n \"avg_ns\": 162459664787,\n \"stddev_ns\": 728280103,\n \"avg_ts\": 0.807432,\n \"stddev_ts\": 0.161860,\n \"samples_ns\": [ 184005173309, 128897205392, 174476615661 ],\n \"samples_ts\": [ 0.695633, 0.993039, 0.733623 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-12T09:45:45Z", - "avg_ns": 112835325904, - "stddev_ns": 839118946, - "avg_ts": 1.263564, - "stddev_ts": 0.475573, - "samples_ns": [ - 165096514562, - 74188096354, - 99221366798 - ], - "samples_ts": [ - 0.775304, - 1.72534, - 1.29004 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-12T09:52:22Z", - "avg_ns": 162459664787, - "stddev_ns": 728280103, - "avg_ts": 0.807432, - "stddev_ts": 0.16186, - "samples_ns": [ - 184005173309, - 128897205392, - 174476615661 - ], - "samples_ts": [ - 0.695633, - 0.993039, - 0.733623 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-12B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 684 - }, - { - "timestamp_utc": "2025-12-12T10:39:30.484597+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T10:00:33Z\",\n \"avg_ns\": 112680469730,\n \"stddev_ns\": 3789003014,\n \"avg_ts\": 1.276669,\n \"stddev_ts\": 0.511374,\n \"samples_ns\": [ 164920765344, 71181124921, 101939518926 ],\n \"samples_ts\": [ 0.77613, 1.79823, 1.25565 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T10:07:26Z\",\n \"avg_ns\": 640470180530,\n \"stddev_ns\": 2330851669,\n \"avg_ts\": 0.799420,\n \"stddev_ts\": 0.002904,\n \"samples_ns\": [ 643138685738, 639439538649, 638832317203 ],\n \"samples_ts\": [ 0.796096, 0.800701, 0.801462 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-12T10:00:33Z", - "avg_ns": 112680469730, - "stddev_ns": 3789003014, - "avg_ts": 1.276669, - "stddev_ts": 0.511374, - "samples_ns": [ - 164920765344, - 71181124921, - 101939518926 - ], - "samples_ts": [ - 0.77613, - 1.79823, - 1.25565 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-12T10:07:26Z", - "avg_ns": 640470180530, - "stddev_ns": 2330851669, - "avg_ts": 0.79942, - "stddev_ts": 0.002904, - "samples_ns": [ - 643138685738, - 639439538649, - 638832317203 - ], - "samples_ts": [ - 0.796096, - 0.800701, - 0.801462 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-12B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 685 - }, - { - "timestamp_utc": "2025-12-12T11:15:45.878485+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T10:39:32Z\",\n \"avg_ns\": 443152887286,\n \"stddev_ns\": 2152969265,\n \"avg_ts\": 1.158560,\n \"stddev_ts\": 0.074554,\n \"samples_ns\": [ 472289959109, 415184153628, 441984549122 ],\n \"samples_ts\": [ 1.08408, 1.23319, 1.15841 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T11:08:06Z\",\n \"avg_ns\": 152444009989,\n \"stddev_ns\": 3947046676,\n \"avg_ts\": 0.845189,\n \"stddev_ts\": 0.084038,\n \"samples_ns\": [ 152398069894, 167557213672, 137376746403 ],\n \"samples_ts\": [ 0.839906, 0.763918, 0.931744 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-12T10:39:32Z", - "avg_ns": 443152887286, - "stddev_ns": 2152969265, - "avg_ts": 1.15856, - "stddev_ts": 0.074554, - "samples_ns": [ - 472289959109, - 415184153628, - 441984549122 - ], - "samples_ts": [ - 1.08408, - 1.23319, - 1.15841 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-12T11:08:06Z", - "avg_ns": 152444009989, - "stddev_ns": 3947046676, - "avg_ts": 0.845189, - "stddev_ts": 0.084038, - "samples_ns": [ - 152398069894, - 167557213672, - 137376746403 - ], - "samples_ts": [ - 0.839906, - 0.763918, - 0.931744 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-12B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 686 - }, - { - "timestamp_utc": "2025-12-12T12:16:04.241376+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T11:15:47Z\",\n \"avg_ns\": 442070233615,\n \"stddev_ns\": 1697160734,\n \"avg_ts\": 1.160264,\n \"stddev_ts\": 0.059923,\n \"samples_ns\": [ 439596218760, 420415714268, 466198767818 ],\n \"samples_ts\": [ 1.16471, 1.21784, 1.09824 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T11:45:22Z\",\n \"avg_ns\": 613760957086,\n \"stddev_ns\": 1785711423,\n \"avg_ts\": 0.834260,\n \"stddev_ts\": 0.008555,\n \"samples_ns\": [ 610240799523, 609972286437, 621069785300 ],\n \"samples_ts\": [ 0.839013, 0.839382, 0.824384 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-12T11:15:47Z", - "avg_ns": 442070233615, - "stddev_ns": 1697160734, - "avg_ts": 1.160264, - "stddev_ts": 0.059923, - "samples_ns": [ - 439596218760, - 420415714268, - 466198767818 - ], - "samples_ts": [ - 1.16471, - 1.21784, - 1.09824 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 512, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-12T11:45:22Z", - "avg_ns": 613760957086, - "stddev_ns": 1785711423, - "avg_ts": 0.83426, - "stddev_ts": 0.008555, - "samples_ns": [ - 610240799523, - 609972286437, - 621069785300 - ], - "samples_ts": [ - 0.839013, - 0.839382, - 0.824384 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-12B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 512, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 687 - }, - { - "timestamp_utc": "2025-12-12T12:31:00.305318+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T12:16:05Z\",\n \"avg_ns\": 112522524880,\n \"stddev_ns\": 4175356644,\n \"avg_ts\": 1.334911,\n \"stddev_ts\": 0.668356,\n \"samples_ns\": [ 111011521023, 61675420012, 164880633605 ],\n \"samples_ts\": [ 1.15303, 2.07538, 0.776319 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T12:24:14Z\",\n \"avg_ns\": 134470271602,\n \"stddev_ns\": 3621651075,\n \"avg_ts\": 1.086117,\n \"stddev_ts\": 0.415590,\n \"samples_ns\": [ 100765010271, 209744280071, 92901524464 ],\n \"samples_ts\": [ 1.27028, 0.610267, 1.3778 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-12T12:16:05Z", - "avg_ns": 112522524880, - "stddev_ns": 4175356644, - "avg_ts": 1.334911, - "stddev_ts": 0.668356, - "samples_ns": [ - 111011521023, - 61675420012, - 164880633605 - ], - "samples_ts": [ - 1.15303, - 2.07538, - 0.776319 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-12T12:24:14Z", - "avg_ns": 134470271602, - "stddev_ns": 3621651075, - "avg_ts": 1.086117, - "stddev_ts": 0.41559, - "samples_ns": [ - 100765010271, - 209744280071, - 92901524464 - ], - "samples_ts": [ - 1.27028, - 0.610267, - 1.3778 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-12B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 688 - }, - { - "timestamp_utc": "2025-12-12T13:10:21.458316+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T12:31:01Z\",\n \"avg_ns\": 113497296436,\n \"stddev_ns\": 4087263520,\n \"avg_ts\": 1.426589,\n \"stddev_ts\": 0.903181,\n \"samples_ns\": [ 120525511378, 52136930121, 167829447811 ],\n \"samples_ts\": [ 1.06202, 2.45507, 0.762679 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T12:39:04Z\",\n \"avg_ns\": 624976989004,\n \"stddev_ns\": 3946718053,\n \"avg_ts\": 0.819548,\n \"stddev_ts\": 0.019761,\n \"samples_ns\": [ 640398134700, 624291751599, 610241080715 ],\n \"samples_ts\": [ 0.799503, 0.820129, 0.839013 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-12T12:31:01Z", - "avg_ns": 113497296436, - "stddev_ns": 4087263520, - "avg_ts": 1.426589, - "stddev_ts": 0.903181, - "samples_ns": [ - 120525511378, - 52136930121, - 167829447811 - ], - "samples_ts": [ - 1.06202, - 2.45507, - 0.762679 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-12T12:39:04Z", - "avg_ns": 624976989004, - "stddev_ns": 3946718053, - "avg_ts": 0.819548, - "stddev_ts": 0.019761, - "samples_ns": [ - 640398134700, - 624291751599, - 610241080715 - ], - "samples_ts": [ - 0.799503, - 0.820129, - 0.839013 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-12B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 689 - }, - { - "timestamp_utc": "2025-12-12T13:46:06.463186+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T13:10:22Z\",\n \"avg_ns\": 452041950931,\n \"stddev_ns\": 1001094681,\n \"avg_ts\": 1.158818,\n \"stddev_ts\": 0.224959,\n \"samples_ns\": [ 497664147277, 360924632581, 497537072937 ],\n \"samples_ts\": [ 1.02881, 1.41858, 1.02907 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T13:39:27Z\",\n \"avg_ns\": 132378052023,\n \"stddev_ns\": 4191672298,\n \"avg_ts\": 1.189930,\n \"stddev_ts\": 0.551556,\n \"samples_ns\": [ 92455926626, 225562122162, 79116107281 ],\n \"samples_ts\": [ 1.38444, 0.567471, 1.61788 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-12T13:10:22Z", - "avg_ns": 452041950931, - "stddev_ns": 1001094681, - "avg_ts": 1.158818, - "stddev_ts": 0.224959, - "samples_ns": [ - 497664147277, - 360924632581, - 497537072937 - ], - "samples_ts": [ - 1.02881, - 1.41858, - 1.02907 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-12T13:39:27Z", - "avg_ns": 132378052023, - "stddev_ns": 4191672298, - "avg_ts": 1.18993, - "stddev_ts": 0.551556, - "samples_ns": [ - 92455926626, - 225562122162, - 79116107281 - ], - "samples_ts": [ - 1.38444, - 0.567471, - 1.61788 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-12B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 690 - }, - { - "timestamp_utc": "2025-12-12T14:46:13.883522+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T13:46:07Z\",\n \"avg_ns\": 425569694516,\n \"stddev_ns\": 3566944011,\n \"avg_ts\": 1.208679,\n \"stddev_ts\": 0.100661,\n \"samples_ns\": [ 390707694534, 461611418073, 424389970943 ],\n \"samples_ts\": [ 1.31044, 1.10916, 1.20644 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T14:15:39Z\",\n \"avg_ns\": 611205759493,\n \"stddev_ns\": 1438211357,\n \"avg_ts\": 0.837692,\n \"stddev_ts\": 0.001968,\n \"samples_ns\": [ 610396314221, 610354675348, 612866288910 ],\n \"samples_ts\": [ 0.838799, 0.838857, 0.835419 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-12T13:46:07Z", - "avg_ns": 425569694516, - "stddev_ns": 3566944011, - "avg_ts": 1.208679, - "stddev_ts": 0.100661, - "samples_ns": [ - 390707694534, - 461611418073, - 424389970943 - ], - "samples_ts": [ - 1.31044, - 1.10916, - 1.20644 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 512, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-12T14:15:39Z", - "avg_ns": 611205759493, - "stddev_ns": 1438211357, - "avg_ts": 0.837692, - "stddev_ts": 0.001968, - "samples_ns": [ - 610396314221, - 610354675348, - 612866288910 - ], - "samples_ts": [ - 0.838799, - 0.838857, - 0.835419 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-12B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 512, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 691 - }, - { - "timestamp_utc": "2025-12-12T15:01:08.600104+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T14:46:15Z\",\n \"avg_ns\": 114098544412,\n \"stddev_ns\": 1685025125,\n \"avg_ts\": 1.232547,\n \"stddev_ts\": 0.417200,\n \"samples_ns\": [ 94857234665, 81070943036, 166367455537 ],\n \"samples_ts\": [ 1.3494, 1.57886, 0.769381 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T14:54:43Z\",\n \"avg_ns\": 127700922627,\n \"stddev_ns\": 2598200977,\n \"avg_ts\": 1.311468,\n \"stddev_ts\": 0.662199,\n \"samples_ns\": [ 79838506237, 231260305857, 72003955788 ],\n \"samples_ts\": [ 1.60324, 0.553489, 1.77768 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-12T14:46:15Z", - "avg_ns": 114098544412, - "stddev_ns": 1685025125, - "avg_ts": 1.232547, - "stddev_ts": 0.4172, - "samples_ns": [ - 94857234665, - 81070943036, - 166367455537 - ], - "samples_ts": [ - 1.3494, - 1.57886, - 0.769381 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-12T14:54:43Z", - "avg_ns": 127700922627, - "stddev_ns": 2598200977, - "avg_ts": 1.311468, - "stddev_ts": 0.662199, - "samples_ns": [ - 79838506237, - 231260305857, - 72003955788 - ], - "samples_ts": [ - 1.60324, - 0.553489, - 1.77768 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-12B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 692 - }, - { - "timestamp_utc": "2025-12-12T15:41:07.316888+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T15:01:09Z\",\n \"avg_ns\": 113372320284,\n \"stddev_ns\": 2118000291,\n \"avg_ts\": 1.269549,\n \"stddev_ts\": 0.511031,\n \"samples_ns\": [ 102930659548, 71385548246, 165800753060 ],\n \"samples_ts\": [ 1.24356, 1.79308, 0.772011 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T15:09:29Z\",\n \"avg_ns\": 631794944944,\n \"stddev_ns\": 1127710329,\n \"avg_ts\": 0.810675,\n \"stddev_ts\": 0.018498,\n \"samples_ns\": [ 648663425431, 623352832048, 623368577353 ],\n \"samples_ts\": [ 0.789315, 0.821365, 0.821344 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-12T15:01:09Z", - "avg_ns": 113372320284, - "stddev_ns": 2118000291, - "avg_ts": 1.269549, - "stddev_ts": 0.511031, - "samples_ns": [ - 102930659548, - 71385548246, - 165800753060 - ], - "samples_ts": [ - 1.24356, - 1.79308, - 0.772011 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-12T15:09:29Z", - "avg_ns": 631794944944, - "stddev_ns": 1127710329, - "avg_ts": 0.810675, - "stddev_ts": 0.018498, - "samples_ns": [ - 648663425431, - 623352832048, - 623368577353 - ], - "samples_ts": [ - 0.789315, - 0.821365, - 0.821344 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-12B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 693 - }, - { - "timestamp_utc": "2025-12-12T16:18:40.327635+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T15:41:08Z\",\n \"avg_ns\": 425407919380,\n \"stddev_ns\": 3706919085,\n \"avg_ts\": 1.211384,\n \"stddev_ts\": 0.117876,\n \"samples_ns\": [ 388022343452, 471574594179, 416626820511 ],\n \"samples_ts\": [ 1.31951, 1.08572, 1.22892 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T16:10:44Z\",\n \"avg_ns\": 158365553423,\n \"stddev_ns\": 3953611026,\n \"avg_ts\": 0.821615,\n \"stddev_ts\": 0.134133,\n \"samples_ns\": [ 170372984857, 131109162978, 173614512434 ],\n \"samples_ts\": [ 0.751293, 0.976286, 0.737266 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-12T15:41:08Z", - "avg_ns": 425407919380, - "stddev_ns": 3706919085, - "avg_ts": 1.211384, - "stddev_ts": 0.117876, - "samples_ns": [ - 388022343452, - 471574594179, - 416626820511 - ], - "samples_ts": [ - 1.31951, - 1.08572, - 1.22892 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-12T16:10:44Z", - "avg_ns": 158365553423, - "stddev_ns": 3953611026, - "avg_ts": 0.821615, - "stddev_ts": 0.134133, - "samples_ns": [ - 170372984857, - 131109162978, - 173614512434 - ], - "samples_ts": [ - 0.751293, - 0.976286, - 0.737266 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-12B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 694 - }, - { - "timestamp_utc": "2025-12-12T17:19:39.654331+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "512", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T16:18:42Z\",\n \"avg_ns\": 432201643323,\n \"stddev_ns\": 2339400869,\n \"avg_ts\": 1.186529,\n \"stddev_ts\": 0.058700,\n \"samples_ns\": [ 438365013904, 449380286980, 408859629086 ],\n \"samples_ts\": [ 1.16798, 1.13935, 1.25226 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T16:47:19Z\",\n \"avg_ns\": 645714671136,\n \"stddev_ns\": 2104104935,\n \"avg_ts\": 0.792925,\n \"stddev_ts\": 0.002579,\n \"samples_ns\": [ 644796626623, 648121808378, 644225578409 ],\n \"samples_ts\": [ 0.794049, 0.789975, 0.794753 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-12T16:18:42Z", - "avg_ns": 432201643323, - "stddev_ns": 2339400869, - "avg_ts": 1.186529, - "stddev_ts": 0.0587, - "samples_ns": [ - 438365013904, - 449380286980, - 408859629086 - ], - "samples_ts": [ - 1.16798, - 1.13935, - 1.25226 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 512, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-12T16:47:19Z", - "avg_ns": 645714671136, - "stddev_ns": 2104104935, - "avg_ts": 0.792925, - "stddev_ts": 0.002579, - "samples_ns": [ - 644796626623, - 648121808378, - 644225578409 - ], - "samples_ts": [ - 0.794049, - 0.789975, - 0.794753 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-12B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 512, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 695 - }, - { - "timestamp_utc": "2025-12-12T17:34:45.300793+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T17:19:41Z\",\n \"avg_ns\": 103701817716,\n \"stddev_ns\": 4174022432,\n \"avg_ts\": 1.553188,\n \"stddev_ts\": 0.960636,\n \"samples_ns\": [ 107076363571, 155569443022, 48459646557 ],\n \"samples_ts\": [ 1.19541, 0.822784, 2.64137 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T17:25:58Z\",\n \"avg_ns\": 175231506007,\n \"stddev_ns\": 3435356769,\n \"avg_ts\": 0.870266,\n \"stddev_ts\": 0.491682,\n \"samples_ns\": [ 214121520295, 89021112249, 222551885478 ],\n \"samples_ts\": [ 0.597791, 1.43786, 0.575147 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-12T17:19:41Z", - "avg_ns": 103701817716, - "stddev_ns": 4174022432, - "avg_ts": 1.553188, - "stddev_ts": 0.960636, - "samples_ns": [ - 107076363571, - 155569443022, - 48459646557 - ], - "samples_ts": [ - 1.19541, - 0.822784, - 2.64137 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-12T17:25:58Z", - "avg_ns": 175231506007, - "stddev_ns": 3435356769, - "avg_ts": 0.870266, - "stddev_ts": 0.491682, - "samples_ns": [ - 214121520295, - 89021112249, - 222551885478 - ], - "samples_ts": [ - 0.597791, - 1.43786, - 0.575147 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-12B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 696 - }, - { - "timestamp_utc": "2025-12-12T18:15:09.633939+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T17:34:47Z\",\n \"avg_ns\": 103795639104,\n \"stddev_ns\": 1479619400,\n \"avg_ts\": 1.543416,\n \"stddev_ts\": 0.958557,\n \"samples_ns\": [ 111488016589, 151348001243, 48550899481 ],\n \"samples_ts\": [ 1.14811, 0.845733, 2.63641 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T17:41:00Z\",\n \"avg_ns\": 682710691695,\n \"stddev_ns\": 2686221414,\n \"avg_ts\": 0.750149,\n \"stddev_ts\": 0.014974,\n \"samples_ns\": [ 667539762513, 693433357846, 687158954728 ],\n \"samples_ts\": [ 0.766996, 0.738355, 0.745097 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-12T17:34:47Z", - "avg_ns": 103795639104, - "stddev_ns": 1479619400, - "avg_ts": 1.543416, - "stddev_ts": 0.958557, - "samples_ns": [ - 111488016589, - 151348001243, - 48550899481 - ], - "samples_ts": [ - 1.14811, - 0.845733, - 2.63641 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-12T17:41:00Z", - "avg_ns": 682710691695, - "stddev_ns": 2686221414, - "avg_ts": 0.750149, - "stddev_ts": 0.014974, - "samples_ns": [ - 667539762513, - 693433357846, - 687158954728 - ], - "samples_ts": [ - 0.766996, - 0.738355, - 0.745097 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-12B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 697 - }, - { - "timestamp_utc": "2025-12-12T18:50:56.564242+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T18:15:10Z\",\n \"avg_ns\": 452697442879,\n \"stddev_ns\": 740402504,\n \"avg_ts\": 1.146984,\n \"stddev_ts\": 0.171581,\n \"samples_ns\": [ 503276983504, 381647454215, 473167890920 ],\n \"samples_ts\": [ 1.01733, 1.34155, 1.08207 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T18:43:51Z\",\n \"avg_ns\": 140943335197,\n \"stddev_ns\": 1148408994,\n \"avg_ts\": 0.971508,\n \"stddev_ts\": 0.280891,\n \"samples_ns\": [ 119164610929, 195798371272, 107867023392 ],\n \"samples_ts\": [ 1.07414, 0.653734, 1.18665 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-12T18:15:10Z", - "avg_ns": 452697442879, - "stddev_ns": 740402504, - "avg_ts": 1.146984, - "stddev_ts": 0.171581, - "samples_ns": [ - 503276983504, - 381647454215, - 473167890920 - ], - "samples_ts": [ - 1.01733, - 1.34155, - 1.08207 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-12T18:43:51Z", - "avg_ns": 140943335197, - "stddev_ns": 1148408994, - "avg_ts": 0.971508, - "stddev_ts": 0.280891, - "samples_ns": [ - 119164610929, - 195798371272, - 107867023392 - ], - "samples_ts": [ - 1.07414, - 0.653734, - 1.18665 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-12B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 698 - }, - { - "timestamp_utc": "2025-12-12T19:50:56.042050+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T18:50:57Z\",\n \"avg_ns\": 433817179352,\n \"stddev_ns\": 3690976331,\n \"avg_ts\": 1.181421,\n \"stddev_ts\": 0.046605,\n \"samples_ns\": [ 414669096454, 440918512679, 445863928925 ],\n \"samples_ts\": [ 1.23472, 1.16121, 1.14833 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T19:20:31Z\",\n \"avg_ns\": 607936037427,\n \"stddev_ns\": 523956281,\n \"avg_ts\": 0.842194,\n \"stddev_ts\": 0.000726,\n \"samples_ns\": [ 608006633762, 608421116337, 607380362182 ],\n \"samples_ts\": [ 0.842096, 0.841522, 0.842964 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-12T18:50:57Z", - "avg_ns": 433817179352, - "stddev_ns": 3690976331, - "avg_ts": 1.181421, - "stddev_ts": 0.046605, - "samples_ns": [ - 414669096454, - 440918512679, - 445863928925 - ], - "samples_ts": [ - 1.23472, - 1.16121, - 1.14833 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 1024, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-12T19:20:31Z", - "avg_ns": 607936037427, - "stddev_ns": 523956281, - "avg_ts": 0.842194, - "stddev_ts": 0.000726, - "samples_ns": [ - 608006633762, - 608421116337, - 607380362182 - ], - "samples_ts": [ - 0.842096, - 0.841522, - 0.842964 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-12B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 699 - }, - { - "timestamp_utc": "2025-12-12T20:05:53.464204+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T19:50:57Z\",\n \"avg_ns\": 112620680082,\n \"stddev_ns\": 3712453011,\n \"avg_ts\": 1.466233,\n \"stddev_ts\": 0.997309,\n \"samples_ns\": [ 132744088650, 48952210960, 156165740638 ],\n \"samples_ts\": [ 0.964261, 2.6148, 0.819642 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T19:58:45Z\",\n \"avg_ns\": 141986215092,\n \"stddev_ns\": 1095826485,\n \"avg_ts\": 0.944271,\n \"stddev_ts\": 0.229652,\n \"samples_ns\": [ 123000429239, 187646115161, 115312100876 ],\n \"samples_ts\": [ 1.04065, 0.682135, 1.11003 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-12T19:50:57Z", - "avg_ns": 112620680082, - "stddev_ns": 3712453011, - "avg_ts": 1.466233, - "stddev_ts": 0.997309, - "samples_ns": [ - 132744088650, - 48952210960, - 156165740638 - ], - "samples_ts": [ - 0.964261, - 2.6148, - 0.819642 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-12T19:58:45Z", - "avg_ns": 141986215092, - "stddev_ns": 1095826485, - "avg_ts": 0.944271, - "stddev_ts": 0.229652, - "samples_ns": [ - 123000429239, - 187646115161, - 115312100876 - ], - "samples_ts": [ - 1.04065, - 0.682135, - 1.11003 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-12B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 700 - }, - { - "timestamp_utc": "2025-12-12T20:45:56.110824+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T20:05:54Z\",\n \"avg_ns\": 112629767845,\n \"stddev_ns\": 2292593942,\n \"avg_ts\": 1.471362,\n \"stddev_ts\": 1.014805,\n \"samples_ns\": [ 139026413202, 48439597087, 150423293248 ],\n \"samples_ts\": [ 0.920688, 2.64247, 0.850932 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T20:13:36Z\",\n \"avg_ns\": 645774358168,\n \"stddev_ns\": 3797116729,\n \"avg_ts\": 0.794598,\n \"stddev_ts\": 0.044942,\n \"samples_ns\": [ 689372000544, 623807605713, 624143468248 ],\n \"samples_ts\": [ 0.742705, 0.820766, 0.820324 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-12T20:05:54Z", - "avg_ns": 112629767845, - "stddev_ns": 2292593942, - "avg_ts": 1.471362, - "stddev_ts": 1.014805, - "samples_ns": [ - 139026413202, - 48439597087, - 150423293248 - ], - "samples_ts": [ - 0.920688, - 2.64247, - 0.850932 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-12T20:13:36Z", - "avg_ns": 645774358168, - "stddev_ns": 3797116729, - "avg_ts": 0.794598, - "stddev_ts": 0.044942, - "samples_ns": [ - 689372000544, - 623807605713, - 624143468248 - ], - "samples_ts": [ - 0.742705, - 0.820766, - 0.820324 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-12B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 701 - }, - { - "timestamp_utc": "2025-12-12T21:23:03.468463+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T20:45:57Z\",\n \"avg_ns\": 434139767515,\n \"stddev_ns\": 2601199422,\n \"avg_ts\": 1.180571,\n \"stddev_ts\": 0.046710,\n \"samples_ns\": [ 416580766717, 435080142337, 450758393491 ],\n \"samples_ts\": [ 1.22905, 1.17679, 1.13586 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T21:15:29Z\",\n \"avg_ns\": 151178279340,\n \"stddev_ns\": 4288946734,\n \"avg_ts\": 0.856581,\n \"stddev_ts\": 0.111725,\n \"samples_ns\": [ 132729621636, 172670939824, 148134276562 ],\n \"samples_ts\": [ 0.964366, 0.741294, 0.864081 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-12T20:45:57Z", - "avg_ns": 434139767515, - "stddev_ns": 2601199422, - "avg_ts": 1.180571, - "stddev_ts": 0.04671, - "samples_ns": [ - 416580766717, - 435080142337, - 450758393491 - ], - "samples_ts": [ - 1.22905, - 1.17679, - 1.13586 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-12T21:15:29Z", - "avg_ns": 151178279340, - "stddev_ns": 4288946734, - "avg_ts": 0.856581, - "stddev_ts": 0.111725, - "samples_ns": [ - 132729621636, - 172670939824, - 148134276562 - ], - "samples_ts": [ - 0.964366, - 0.741294, - 0.864081 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-12B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 702 - }, - { - "timestamp_utc": "2025-12-12T22:24:15.490788+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T21:23:05Z\",\n \"avg_ns\": 420290636941,\n \"stddev_ns\": 3803427455,\n \"avg_ts\": 1.230745,\n \"stddev_ts\": 0.149697,\n \"samples_ns\": [ 408204033893, 478202618175, 374465258757 ],\n \"samples_ts\": [ 1.25427, 1.07068, 1.36728 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T21:51:32Z\",\n \"avg_ns\": 653561124609,\n \"stddev_ns\": 371032783,\n \"avg_ts\": 0.783400,\n \"stddev_ts\": 0.000445,\n \"samples_ns\": [ 653895887710, 653162192683, 653625293434 ],\n \"samples_ts\": [ 0.782999, 0.783879, 0.783323 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-12T21:23:05Z", - "avg_ns": 420290636941, - "stddev_ns": 3803427455, - "avg_ts": 1.230745, - "stddev_ts": 0.149697, - "samples_ns": [ - 408204033893, - 478202618175, - 374465258757 - ], - "samples_ts": [ - 1.25427, - 1.07068, - 1.36728 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 1024, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-12T21:51:32Z", - "avg_ns": 653561124609, - "stddev_ns": 371032783, - "avg_ts": 0.7834, - "stddev_ts": 0.000445, - "samples_ns": [ - 653895887710, - 653162192683, - 653625293434 - ], - "samples_ts": [ - 0.782999, - 0.783879, - 0.783323 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-12B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 703 - }, - { - "timestamp_utc": "2025-12-12T22:39:19.095759+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T22:24:17Z\",\n \"avg_ns\": 103702276000,\n \"stddev_ns\": 3113146215,\n \"avg_ts\": 1.492387,\n \"stddev_ts\": 0.755038,\n \"samples_ns\": [ 90202049124, 164799476253, 56105302625 ],\n \"samples_ts\": [ 1.41904, 0.776701, 2.28142 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T22:30:52Z\",\n \"avg_ns\": 168668601255,\n \"stddev_ns\": 4267847409,\n \"avg_ts\": 0.821607,\n \"stddev_ts\": 0.306034,\n \"samples_ns\": [ 193982103488, 108975562384, 203048137893 ],\n \"samples_ts\": [ 0.659855, 1.17458, 0.630392 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-12T22:24:17Z", - "avg_ns": 103702276000, - "stddev_ns": 3113146215, - "avg_ts": 1.492387, - "stddev_ts": 0.755038, - "samples_ns": [ - 90202049124, - 164799476253, - 56105302625 - ], - "samples_ts": [ - 1.41904, - 0.776701, - 2.28142 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-12T22:30:52Z", - "avg_ns": 168668601255, - "stddev_ns": 4267847409, - "avg_ts": 0.821607, - "stddev_ts": 0.306034, - "samples_ns": [ - 193982103488, - 108975562384, - 203048137893 - ], - "samples_ts": [ - 0.659855, - 1.17458, - 0.630392 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-12B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 704 - }, - { - "timestamp_utc": "2025-12-12T23:19:40.341063+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T22:39:21Z\",\n \"avg_ns\": 103711286450,\n \"stddev_ns\": 2558955813,\n \"avg_ts\": 1.515705,\n \"stddev_ts\": 0.812855,\n \"samples_ns\": [ 92287298737, 165183281473, 53663279142 ],\n \"samples_ts\": [ 1.38697, 0.774897, 2.38524 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T22:45:53Z\",\n \"avg_ns\": 675409613274,\n \"stddev_ns\": 4071734341,\n \"avg_ts\": 0.759021,\n \"stddev_ts\": 0.033529,\n \"samples_ns\": [ 641835032795, 691081931579, 693311875449 ],\n \"samples_ts\": [ 0.797713, 0.740867, 0.738484 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-12T22:39:21Z", - "avg_ns": 103711286450, - "stddev_ns": 2558955813, - "avg_ts": 1.515705, - "stddev_ts": 0.812855, - "samples_ns": [ - 92287298737, - 165183281473, - 53663279142 - ], - "samples_ts": [ - 1.38697, - 0.774897, - 2.38524 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-12T22:45:53Z", - "avg_ns": 675409613274, - "stddev_ns": 4071734341, - "avg_ts": 0.759021, - "stddev_ts": 0.033529, - "samples_ns": [ - 641835032795, - 691081931579, - 693311875449 - ], - "samples_ts": [ - 0.797713, - 0.740867, - 0.738484 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-12B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 705 - }, - { - "timestamp_utc": "2025-12-12T23:55:49.023372+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T23:19:42Z\",\n \"avg_ns\": 446092205874,\n \"stddev_ns\": 4164271949,\n \"avg_ts\": 1.152923,\n \"stddev_ts\": 0.095531,\n \"samples_ns\": [ 479918134866, 407738823613, 450619659143 ],\n \"samples_ts\": [ 1.06685, 1.25571, 1.13621 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T23:48:19Z\",\n \"avg_ns\": 149178651796,\n \"stddev_ns\": 2641029653,\n \"avg_ts\": 0.868486,\n \"stddev_ts\": 0.114092,\n \"samples_ns\": [ 143351070882, 172006162515, 132178721992 ],\n \"samples_ts\": [ 0.892913, 0.744159, 0.968386 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-12T23:19:42Z", - "avg_ns": 446092205874, - "stddev_ns": 4164271949, - "avg_ts": 1.152923, - "stddev_ts": 0.095531, - "samples_ns": [ - 479918134866, - 407738823613, - 450619659143 - ], - "samples_ts": [ - 1.06685, - 1.25571, - 1.13621 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-12T23:48:19Z", - "avg_ns": 149178651796, - "stddev_ns": 2641029653, - "avg_ts": 0.868486, - "stddev_ts": 0.114092, - "samples_ns": [ - 143351070882, - 172006162515, - 132178721992 - ], - "samples_ts": [ - 0.892913, - 0.744159, - 0.968386 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-12B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 706 - }, - { - "timestamp_utc": "2025-12-13T00:55:56.835003+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "1024", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T23:55:50Z\",\n \"avg_ns\": 440201316942,\n \"stddev_ns\": 1745127691,\n \"avg_ts\": 1.164383,\n \"stddev_ts\": 0.046764,\n \"samples_ns\": [ 432644636089, 427155985880, 460803328858 ],\n \"samples_ts\": [ 1.18342, 1.19863, 1.1111 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T00:25:26Z\",\n \"avg_ns\": 609777167990,\n \"stddev_ns\": 50513467,\n \"avg_ts\": 0.839651,\n \"stddev_ts\": 0.000070,\n \"samples_ns\": [ 609804417527, 609808205369, 609718881074 ],\n \"samples_ts\": [ 0.839613, 0.839608, 0.839731 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-12T23:55:50Z", - "avg_ns": 440201316942, - "stddev_ns": 1745127691, - "avg_ts": 1.164383, - "stddev_ts": 0.046764, - "samples_ns": [ - 432644636089, - 427155985880, - 460803328858 - ], - "samples_ts": [ - 1.18342, - 1.19863, - 1.1111 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 1024, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-13T00:25:26Z", - "avg_ns": 609777167990, - "stddev_ns": 50513467, - "avg_ts": 0.839651, - "stddev_ts": 7e-05, - "samples_ns": [ - 609804417527, - 609808205369, - 609718881074 - ], - "samples_ts": [ - 0.839613, - 0.839608, - 0.839731 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-12B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 1024, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 707 - }, - { - "timestamp_utc": "2025-12-13T01:10:54.722613+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T00:55:57Z\",\n \"avg_ns\": 112655842151,\n \"stddev_ns\": 3407710006,\n \"avg_ts\": 1.467220,\n \"stddev_ts\": 0.999554,\n \"samples_ns\": [ 132543613668, 48887860968, 156536051817 ],\n \"samples_ts\": [ 0.96572, 2.61824, 0.817703 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T01:03:46Z\",\n \"avg_ns\": 142098671309,\n \"stddev_ns\": 4214984565,\n \"avg_ts\": 0.946484,\n \"stddev_ts\": 0.237667,\n \"samples_ns\": [ 123179722783, 189224463063, 113891828083 ],\n \"samples_ts\": [ 1.03913, 0.676445, 1.12387 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-13T00:55:57Z", - "avg_ns": 112655842151, - "stddev_ns": 3407710006, - "avg_ts": 1.46722, - "stddev_ts": 0.999554, - "samples_ns": [ - 132543613668, - 48887860968, - 156536051817 - ], - "samples_ts": [ - 0.96572, - 2.61824, - 0.817703 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-13T01:03:46Z", - "avg_ns": 142098671309, - "stddev_ns": 4214984565, - "avg_ts": 0.946484, - "stddev_ts": 0.237667, - "samples_ns": [ - 123179722783, - 189224463063, - 113891828083 - ], - "samples_ts": [ - 1.03913, - 0.676445, - 1.12387 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-12B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 708 - }, - { - "timestamp_utc": "2025-12-13T01:50:58.193779+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T01:10:55Z\",\n \"avg_ns\": 112593300231,\n \"stddev_ns\": 1173111266,\n \"avg_ts\": 1.472165,\n \"stddev_ts\": 1.014676,\n \"samples_ns\": [ 137597103017, 48434549659, 151748248018 ],\n \"samples_ts\": [ 0.930252, 2.64274, 0.843502 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T01:18:38Z\",\n \"avg_ns\": 645651884581,\n \"stddev_ns\": 1585734823,\n \"avg_ts\": 0.794713,\n \"stddev_ts\": 0.044491,\n \"samples_ns\": [ 688783367749, 624096226096, 624076059899 ],\n \"samples_ts\": [ 0.74334, 0.820386, 0.820413 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-13T01:10:55Z", - "avg_ns": 112593300231, - "stddev_ns": 1173111266, - "avg_ts": 1.472165, - "stddev_ts": 1.014676, - "samples_ns": [ - 137597103017, - 48434549659, - 151748248018 - ], - "samples_ts": [ - 0.930252, - 2.64274, - 0.843502 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-13T01:18:38Z", - "avg_ns": 645651884581, - "stddev_ns": 1585734823, - "avg_ts": 0.794713, - "stddev_ts": 0.044491, - "samples_ns": [ - 688783367749, - 624096226096, - 624076059899 - ], - "samples_ts": [ - 0.74334, - 0.820386, - 0.820413 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-12B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 709 - }, - { - "timestamp_utc": "2025-12-13T02:28:02.321099+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T01:50:59Z\",\n \"avg_ns\": 433107466396,\n \"stddev_ns\": 1180695900,\n \"avg_ts\": 1.183506,\n \"stddev_ts\": 0.049465,\n \"samples_ns\": [ 413048414362, 439501814943, 446772169884 ],\n \"samples_ts\": [ 1.23956, 1.16496, 1.146 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T02:20:31Z\",\n \"avg_ns\": 149898500415,\n \"stddev_ns\": 2434813739,\n \"avg_ts\": 0.860653,\n \"stddev_ts\": 0.091804,\n \"samples_ns\": [ 135778974958, 168088643086, 145827883201 ],\n \"samples_ts\": [ 0.942709, 0.761503, 0.877747 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-13T01:50:59Z", - "avg_ns": 433107466396, - "stddev_ns": 1180695900, - "avg_ts": 1.183506, - "stddev_ts": 0.049465, - "samples_ns": [ - 413048414362, - 439501814943, - 446772169884 - ], - "samples_ts": [ - 1.23956, - 1.16496, - 1.146 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-13T02:20:31Z", - "avg_ns": 149898500415, - "stddev_ns": 2434813739, - "avg_ts": 0.860653, - "stddev_ts": 0.091804, - "samples_ns": [ - 135778974958, - 168088643086, - 145827883201 - ], - "samples_ts": [ - 0.942709, - 0.761503, - 0.877747 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-12B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 710 - }, - { - "timestamp_utc": "2025-12-13T03:28:43.545260+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "128", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T02:28:04Z\",\n \"avg_ns\": 421892523663,\n \"stddev_ns\": 2366450119,\n \"avg_ts\": 1.225085,\n \"stddev_ts\": 0.142855,\n \"samples_ns\": [ 409072602089, 478135487650, 378469481252 ],\n \"samples_ts\": [ 1.25161, 1.07083, 1.35282 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T02:56:38Z\",\n \"avg_ns\": 641084880914,\n \"stddev_ns\": 1988864801,\n \"avg_ts\": 0.798651,\n \"stddev_ts\": 0.002480,\n \"samples_ns\": [ 641535805419, 638909269377, 642809567948 ],\n \"samples_ts\": [ 0.798085, 0.801366, 0.796503 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-13T02:28:04Z", - "avg_ns": 421892523663, - "stddev_ns": 2366450119, - "avg_ts": 1.225085, - "stddev_ts": 0.142855, - "samples_ns": [ - 409072602089, - 478135487650, - 378469481252 - ], - "samples_ts": [ - 1.25161, - 1.07083, - 1.35282 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 2048, - "n_ubatch": 128, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-13T02:56:38Z", - "avg_ns": 641084880914, - "stddev_ns": 1988864801, - "avg_ts": 0.798651, - "stddev_ts": 0.00248, - "samples_ns": [ - 641535805419, - 638909269377, - 642809567948 - ], - "samples_ts": [ - 0.798085, - 0.801366, - 0.796503 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-12B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 128, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 711 - }, - { - "timestamp_utc": "2025-12-13T03:43:41.983607+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T03:28:45Z\",\n \"avg_ns\": 103755198637,\n \"stddev_ns\": 3778839432,\n \"avg_ts\": 1.432540,\n \"stddev_ts\": 0.585072,\n \"samples_ns\": [ 67441184333, 165003087847, 78821323732 ],\n \"samples_ts\": [ 1.89795, 0.775743, 1.62393 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T03:35:43Z\",\n \"avg_ns\": 159254363301,\n \"stddev_ns\": 3937350550,\n \"avg_ts\": 0.813152,\n \"stddev_ts\": 0.110770,\n \"samples_ns\": [ 166961289093, 136229040878, 174572759934 ],\n \"samples_ts\": [ 0.766645, 0.939594, 0.733219 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-13T03:28:45Z", - "avg_ns": 103755198637, - "stddev_ns": 3778839432, - "avg_ts": 1.43254, - "stddev_ts": 0.585072, - "samples_ns": [ - 67441184333, - 165003087847, - 78821323732 - ], - "samples_ts": [ - 1.89795, - 0.775743, - 1.62393 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-13T03:35:43Z", - "avg_ns": 159254363301, - "stddev_ns": 3937350550, - "avg_ts": 0.813152, - "stddev_ts": 0.11077, - "samples_ns": [ - 166961289093, - 136229040878, - 174572759934 - ], - "samples_ts": [ - 0.766645, - 0.939594, - 0.733219 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-12B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 712 - }, - { - "timestamp_utc": "2025-12-13T04:23:59.010109+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T03:43:43Z\",\n \"avg_ns\": 103649470155,\n \"stddev_ns\": 1357914149,\n \"avg_ts\": 1.438463,\n \"stddev_ts\": 0.600816,\n \"samples_ns\": [ 65615729081, 164701347474, 80631333910 ],\n \"samples_ts\": [ 1.95075, 0.777164, 1.58747 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T03:50:42Z\",\n \"avg_ns\": 665175871837,\n \"stddev_ns\": 4074038284,\n \"avg_ts\": 0.771335,\n \"stddev_ts\": 0.043757,\n \"samples_ns\": [ 623868969374, 677210066222, 694448579917 ],\n \"samples_ts\": [ 0.820685, 0.756043, 0.737276 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-13T03:43:43Z", - "avg_ns": 103649470155, - "stddev_ns": 1357914149, - "avg_ts": 1.438463, - "stddev_ts": 0.600816, - "samples_ns": [ - 65615729081, - 164701347474, - 80631333910 - ], - "samples_ts": [ - 1.95075, - 0.777164, - 1.58747 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-13T03:50:42Z", - "avg_ns": 665175871837, - "stddev_ns": 4074038284, - "avg_ts": 0.771335, - "stddev_ts": 0.043757, - "samples_ns": [ - 623868969374, - 677210066222, - 694448579917 - ], - "samples_ts": [ - 0.820685, - 0.756043, - 0.737276 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-12B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 713 - }, - { - "timestamp_utc": "2025-12-13T05:00:33.433170+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T04:24:01Z\",\n \"avg_ns\": 433009788211,\n \"stddev_ns\": 3675488178,\n \"avg_ts\": 1.183747,\n \"stddev_ts\": 0.049021,\n \"samples_ns\": [ 446161700331, 439810211459, 413057452845 ],\n \"samples_ts\": [ 1.14757, 1.16414, 1.23954 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T04:52:27Z\",\n \"avg_ns\": 161094175632,\n \"stddev_ns\": 2170589742,\n \"avg_ts\": 0.813458,\n \"stddev_ts\": 0.159862,\n \"samples_ns\": [ 180900680441, 128340431453, 174041415003 ],\n \"samples_ts\": [ 0.707571, 0.997347, 0.735457 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-13T04:24:01Z", - "avg_ns": 433009788211, - "stddev_ns": 3675488178, - "avg_ts": 1.183747, - "stddev_ts": 0.049021, - "samples_ns": [ - 446161700331, - 439810211459, - 413057452845 - ], - "samples_ts": [ - 1.14757, - 1.16414, - 1.23954 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-13T04:52:27Z", - "avg_ns": 161094175632, - "stddev_ns": 2170589742, - "avg_ts": 0.813458, - "stddev_ts": 0.159862, - "samples_ns": [ - 180900680441, - 128340431453, - 174041415003 - ], - "samples_ts": [ - 0.707571, - 0.997347, - 0.735457 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-12B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 714 - }, - { - "timestamp_utc": "2025-12-13T06:00:29.563376+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "256", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T05:00:34Z\",\n \"avg_ns\": 453441476435,\n \"stddev_ns\": 2002151419,\n \"avg_ts\": 1.140364,\n \"stddev_ts\": 0.142538,\n \"samples_ns\": [ 470901425011, 393293217845, 496129786449 ],\n \"samples_ts\": [ 1.08728, 1.30183, 1.03199 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T05:30:13Z\",\n \"avg_ns\": 605080687028,\n \"stddev_ns\": 298909773,\n \"avg_ts\": 0.846168,\n \"stddev_ts\": 0.000418,\n \"samples_ns\": [ 605365395628, 605107304923, 604769360535 ],\n \"samples_ts\": [ 0.84577, 0.846131, 0.846604 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-13T05:00:34Z", - "avg_ns": 453441476435, - "stddev_ns": 2002151419, - "avg_ts": 1.140364, - "stddev_ts": 0.142538, - "samples_ns": [ - 470901425011, - 393293217845, - 496129786449 - ], - "samples_ts": [ - 1.08728, - 1.30183, - 1.03199 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 2048, - "n_ubatch": 256, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-13T05:30:13Z", - "avg_ns": 605080687028, - "stddev_ns": 298909773, - "avg_ts": 0.846168, - "stddev_ts": 0.000418, - "samples_ns": [ - 605365395628, - 605107304923, - 604769360535 - ], - "samples_ts": [ - 0.84577, - 0.846131, - 0.846604 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-12B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 256, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 715 - }, - { - "timestamp_utc": "2025-12-13T06:15:30.836306+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T06:00:30Z\",\n \"avg_ns\": 112827334977,\n \"stddev_ns\": 582596382,\n \"avg_ts\": 1.246088,\n \"stddev_ts\": 0.415146,\n \"samples_ns\": [ 165317924302, 82295430569, 90868650060 ],\n \"samples_ts\": [ 0.774266, 1.55537, 1.40863 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T06:07:13Z\",\n \"avg_ns\": 165207145060,\n \"stddev_ns\": 3880623910,\n \"avg_ts\": 0.810693,\n \"stddev_ts\": 0.224508,\n \"samples_ns\": [ 192612871314, 119715860422, 183292703446 ],\n \"samples_ts\": [ 0.664545, 1.0692, 0.698337 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-13T06:00:30Z", - "avg_ns": 112827334977, - "stddev_ns": 582596382, - "avg_ts": 1.246088, - "stddev_ts": 0.415146, - "samples_ns": [ - 165317924302, - 82295430569, - 90868650060 - ], - "samples_ts": [ - 0.774266, - 1.55537, - 1.40863 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-13T06:07:13Z", - "avg_ns": 165207145060, - "stddev_ns": 3880623910, - "avg_ts": 0.810693, - "stddev_ts": 0.224508, - "samples_ns": [ - 192612871314, - 119715860422, - 183292703446 - ], - "samples_ts": [ - 0.664545, - 1.0692, - 0.698337 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-12B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 716 - }, - { - "timestamp_utc": "2025-12-13T06:55:40.200098+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "128", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T06:15:31Z\",\n \"avg_ns\": 118592120931,\n \"stddev_ns\": 4082249926,\n \"avg_ts\": 1.271682,\n \"stddev_ts\": 0.605264,\n \"samples_ns\": [ 182683119772, 67149703336, 105943539687 ],\n \"samples_ts\": [ 0.700667, 1.90619, 1.20819 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T06:22:34Z\",\n \"avg_ns\": 661106226756,\n \"stddev_ns\": 1262708336,\n \"avg_ts\": 0.775817,\n \"stddev_ts\": 0.040120,\n \"samples_ns\": [ 689888632797, 669198921242, 624231126230 ],\n \"samples_ts\": [ 0.742149, 0.765094, 0.820209 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 128, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-13T06:15:31Z", - "avg_ns": 118592120931, - "stddev_ns": 4082249926, - "avg_ts": 1.271682, - "stddev_ts": 0.605264, - "samples_ns": [ - 182683119772, - 67149703336, - 105943539687 - ], - "samples_ts": [ - 0.700667, - 1.90619, - 1.20819 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-13T06:22:34Z", - "avg_ns": 661106226756, - "stddev_ns": 1262708336, - "avg_ts": 0.775817, - "stddev_ts": 0.04012, - "samples_ns": [ - 689888632797, - 669198921242, - 624231126230 - ], - "samples_ts": [ - 0.742149, - 0.765094, - 0.820209 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-12B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 128, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 717 - }, - { - "timestamp_utc": "2025-12-13T07:32:12.125925+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "128", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T06:55:41Z\",\n \"avg_ns\": 448060419825,\n \"stddev_ns\": 558266600,\n \"avg_ts\": 1.148811,\n \"stddev_ts\": 0.104071,\n \"samples_ns\": [ 455610121388, 405345704636, 483225433452 ],\n \"samples_ts\": [ 1.12377, 1.26312, 1.05955 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T07:25:18Z\",\n \"avg_ns\": 137570068479,\n \"stddev_ns\": 487337883,\n \"avg_ts\": 1.040578,\n \"stddev_ts\": 0.374631,\n \"samples_ns\": [ 96490388209, 207623556537, 108596260692 ],\n \"samples_ts\": [ 1.32656, 0.6165, 1.17868 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-13T06:55:41Z", - "avg_ns": 448060419825, - "stddev_ns": 558266600, - "avg_ts": 1.148811, - "stddev_ts": 0.104071, - "samples_ns": [ - 455610121388, - 405345704636, - 483225433452 - ], - "samples_ts": [ - 1.12377, - 1.26312, - 1.05955 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 128, - "n_depth": 0, - "test_time": "2025-12-13T07:25:18Z", - "avg_ns": 137570068479, - "stddev_ns": 487337883, - "avg_ts": 1.040578, - "stddev_ts": 0.374631, - "samples_ns": [ - 96490388209, - 207623556537, - 108596260692 - ], - "samples_ts": [ - 1.32656, - 0.6165, - 1.17868 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-12B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 128, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 718 - }, - { - "timestamp_utc": "2025-12-13T08:32:48.786193+00:00", - "command": [ - "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", - "--model", - "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "--threads", - "2", - "--batch-size", - "2048", - "--ubatch-size", - "512", - "--n-prompt", - "512", - "--n-gen", - "512", - "--repetitions", - "3", - "--output", - "json" - ], - "returncode": 0, - "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T07:32:14Z\",\n \"avg_ns\": 416491989808,\n \"stddev_ns\": 4067130096,\n \"avg_ts\": 1.256586,\n \"stddev_ts\": 0.215532,\n \"samples_ns\": [ 375994343626, 507603331438, 365878294362 ],\n \"samples_ts\": [ 1.36172, 1.00866, 1.39937 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T08:01:07Z\",\n \"avg_ns\": 633429283343,\n \"stddev_ns\": 994638012,\n \"avg_ts\": 0.808425,\n \"stddev_ts\": 0.012430,\n \"samples_ns\": [ 622300307727, 638417185123, 639570357179 ],\n \"samples_ts\": [ 0.822754, 0.801983, 0.800537 ]\n }\n]\n", - "stderr": "", - "parsed": [ - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 512, - "n_gen": 0, - "n_depth": 0, - "test_time": "2025-12-13T07:32:14Z", - "avg_ns": 416491989808, - "stddev_ns": 4067130096, - "avg_ts": 1.256586, - "stddev_ts": 0.215532, - "samples_ns": [ - 375994343626, - 507603331438, - 365878294362 - ], - "samples_ts": [ - 1.36172, - 1.00866, - 1.39937 - ] - }, - { - "build_commit": "2fa51c19b", - "build_number": 7326, - "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", - "gpu_info": "", - "backends": "CPU", - "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_type": "gemma3 12B Q2_K - Medium", - "model_size": 4761669504, - "model_n_params": 11766034176, - "n_batch": 2048, - "n_ubatch": 512, - "n_threads": 2, - "cpu_mask": "0x0", - "cpu_strict": false, - "poll": 50, - "type_k": "f16", - "type_v": "f16", - "n_gpu_layers": 99, - "n_cpu_moe": 0, - "split_mode": "layer", - "main_gpu": 0, - "no_kv_offload": false, - "flash_attn": false, - "devices": "auto", - "tensor_split": "0.00", - "tensor_buft_overrides": "none", - "use_mmap": true, - "embeddings": false, - "no_op_offload": 0, - "no_host": false, - "n_prompt": 0, - "n_gen": 512, - "n_depth": 0, - "test_time": "2025-12-13T08:01:07Z", - "avg_ns": 633429283343, - "stddev_ns": 994638012, - "avg_ts": 0.808425, - "stddev_ts": 0.01243, - "samples_ns": [ - 622300307727, - 638417185123, - 639570357179 - ], - "samples_ts": [ - 0.822754, - 0.801983, - 0.800537 - ] - } - ], - "params": { - "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", - "model_repo": "unsloth/gemma-3-12B-it-GGUF", - "quantization": "Q2_K_L", - "threads": 2, - "batch_size": 2048, - "ubatch_size": 512, - "n_prompt": 512, - "n_gen": 512, - "repetitions": 3, - "numa": null, - "priority": 0, - "progress": false - }, - "run_index": 719 - } - ], - "plots": [ - "/home/ubuntu/sunkiss/inference/result/throughput_vs_threads.png", - "/home/ubuntu/sunkiss/inference/result/throughput_vs_batch.png", - "/home/ubuntu/sunkiss/inference/result/latency_vs_threads.png" - ] -}