{ "timestamp_utc": "2025-12-08T20:00:25.887398+00:00", "bench_binary": "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "system": { "hostname": "ip-172-31-82-82", "platform": "Linux-6.14.0-1015-aws-x86_64-with-glibc2.39", "python": "3.12.3", "cpu_count": 2, "cpu_info": { "lscpu": [ { "field": "Architecture:", "data": "x86_64" }, { "field": "CPU op-mode(s):", "data": "32-bit, 64-bit" }, { "field": "Address sizes:", "data": "46 bits physical, 48 bits virtual" }, { "field": "Byte Order:", "data": "Little Endian" }, { "field": "CPU(s):", "data": "2" }, { "field": "On-line CPU(s) list:", "data": "0,1" }, { "field": "Vendor ID:", "data": "GenuineIntel" }, { "field": "Model name:", "data": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz" }, { "field": "CPU family:", "data": "6" }, { "field": "Model:", "data": "79" }, { "field": "Thread(s) per core:", "data": "1" }, { "field": "Core(s) per socket:", "data": "2" }, { "field": "Socket(s):", "data": "1" }, { "field": "Stepping:", "data": "1" }, { "field": "BogoMIPS:", "data": "4600.03" }, { "field": "Flags:", "data": "fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx rdtscp lm constant_tsc rep_good nopl xtopology cpuid tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm cpuid_fault pti fsgsbase bmi1 avx2 smep bmi2 erms invpcid xsaveopt" }, { "field": "Hypervisor vendor:", "data": "Xen" }, { "field": "Virtualization type:", "data": "full" }, { "field": "L1d cache:", "data": "64 KiB (2 instances)" }, { "field": "L1i cache:", "data": "64 KiB (2 instances)" }, { "field": "L2 cache:", "data": "512 KiB (2 instances)" }, { "field": "L3 cache:", "data": "45 MiB (1 instance)" }, { "field": "NUMA node(s):", "data": "1" }, { "field": "NUMA node0 CPU(s):", "data": "0,1" }, { "field": "Vulnerability Gather data sampling:", "data": "Not affected" }, { "field": "Vulnerability Ghostwrite:", "data": "Not affected" }, { "field": "Vulnerability Indirect target selection:", "data": "Mitigation; Aligned branch/return thunks" }, { "field": "Vulnerability Itlb multihit:", "data": "KVM: Mitigation: VMX unsupported" }, { "field": "Vulnerability L1tf:", "data": "Mitigation; PTE Inversion" }, { "field": "Vulnerability Mds:", "data": "Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown" }, { "field": "Vulnerability Meltdown:", "data": "Mitigation; PTI" }, { "field": "Vulnerability Mmio stale data:", "data": "Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown" }, { "field": "Vulnerability Reg file data sampling:", "data": "Not affected" }, { "field": "Vulnerability Retbleed:", "data": "Not affected" }, { "field": "Vulnerability Spec rstack overflow:", "data": "Not affected" }, { "field": "Vulnerability Spec store bypass:", "data": "Vulnerable" }, { "field": "Vulnerability Spectre v1:", "data": "Mitigation; usercopy/swapgs barriers and __user pointer sanitization" }, { "field": "Vulnerability Spectre v2:", "data": "Mitigation; Retpolines; STIBP disabled; RSB filling; PBRSB-eIBRS Not affected; BHI Retpoline" }, { "field": "Vulnerability Srbds:", "data": "Not affected" }, { "field": "Vulnerability Tsa:", "data": "Not affected" }, { "field": "Vulnerability Tsx async abort:", "data": "Not affected" } ] }, "total_ram_bytes": 8323702784 }, "runs": [ { "timestamp_utc": "2025-12-08T20:03:33.571483+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:03:13Z\",\n \"avg_ns\": 1106500666,\n \"stddev_ns\": 4620157,\n \"avg_ts\": 115.681346,\n \"stddev_ts\": 0.482714,\n \"samples_ns\": [ 1111326820, 1102119132, 1106056048 ],\n \"samples_ts\": [ 115.178, 116.14, 115.727 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:03:18Z\",\n \"avg_ns\": 5133914572,\n \"stddev_ns\": 330441335,\n \"avg_ts\": 24.998817,\n \"stddev_ts\": 1.551529,\n \"samples_ns\": [ 4935630764, 5515375657, 4950737296 ],\n \"samples_ts\": [ 25.9339, 23.2078, 25.8547 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:03:13Z", "avg_ns": 1106500666, "stddev_ns": 4620157, "avg_ts": 115.681346, "stddev_ts": 0.482714, "samples_ns": [ 1111326820, 1102119132, 1106056048 ], "samples_ts": [ 115.178, 116.14, 115.727 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T20:03:18Z", "avg_ns": 5133914572, "stddev_ns": 330441335, "avg_ts": 24.998817, "stddev_ts": 1.551529, "samples_ns": [ 4935630764, 5515375657, 4950737296 ], "samples_ts": [ 25.9339, 23.2078, 25.8547 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 512, "ubatch_size": 128, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 0 }, { "timestamp_utc": "2025-12-08T20:04:40.957262+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:03:34Z\",\n \"avg_ns\": 1110582042,\n \"stddev_ns\": 2401701,\n \"avg_ts\": 115.255239,\n \"stddev_ts\": 0.248976,\n \"samples_ns\": [ 1113314164, 1109628053, 1108803909 ],\n \"samples_ts\": [ 114.972, 115.354, 115.44 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:03:38Z\",\n \"avg_ns\": 20757011766,\n \"stddev_ns\": 362169642,\n \"avg_ts\": 24.671421,\n \"stddev_ts\": 0.434801,\n \"samples_ns\": [ 20339338168, 20983970456, 20947726675 ],\n \"samples_ts\": [ 25.1729, 24.3996, 24.4418 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:03:34Z", "avg_ns": 1110582042, "stddev_ns": 2401701, "avg_ts": 115.255239, "stddev_ts": 0.248976, "samples_ns": [ 1113314164, 1109628053, 1108803909 ], "samples_ts": [ 114.972, 115.354, 115.44 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T20:03:38Z", "avg_ns": 20757011766, "stddev_ns": 362169642, "avg_ts": 24.671421, "stddev_ts": 0.434801, "samples_ns": [ 20339338168, 20983970456, 20947726675 ], "samples_ts": [ 25.1729, 24.3996, 24.4418 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 512, "ubatch_size": 128, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 1 }, { "timestamp_utc": "2025-12-08T20:05:16.724425+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:04:41Z\",\n \"avg_ns\": 4621011173,\n \"stddev_ns\": 327537615,\n \"avg_ts\": 111.155929,\n \"stddev_ts\": 7.569175,\n \"samples_ns\": [ 4435464928, 4428371729, 4999196862 ],\n \"samples_ts\": [ 115.433, 115.618, 102.416 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:05:00Z\",\n \"avg_ns\": 5417089273,\n \"stddev_ns\": 27683136,\n \"avg_ts\": 23.629337,\n \"stddev_ts\": 0.120778,\n \"samples_ns\": [ 5444432323, 5417757352, 5389078144 ],\n \"samples_ts\": [ 23.5103, 23.626, 23.7517 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:04:41Z", "avg_ns": 4621011173, "stddev_ns": 327537615, "avg_ts": 111.155929, "stddev_ts": 7.569175, "samples_ns": [ 4435464928, 4428371729, 4999196862 ], "samples_ts": [ 115.433, 115.618, 102.416 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T20:05:00Z", "avg_ns": 5417089273, "stddev_ns": 27683136, "avg_ts": 23.629337, "stddev_ts": 0.120778, "samples_ns": [ 5444432323, 5417757352, 5389078144 ], "samples_ts": [ 23.5103, 23.626, 23.7517 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 512, "ubatch_size": 128, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 2 }, { "timestamp_utc": "2025-12-08T20:06:37.135732+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:05:17Z\",\n \"avg_ns\": 4625402712,\n \"stddev_ns\": 313844497,\n \"avg_ts\": 111.020977,\n \"stddev_ts\": 7.249103,\n \"samples_ns\": [ 4446029219, 4987793001, 4442385918 ],\n \"samples_ts\": [ 115.159, 102.651, 115.253 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:05:36Z\",\n \"avg_ns\": 20294551729,\n \"stddev_ns\": 81452388,\n \"avg_ts\": 25.228716,\n \"stddev_ts\": 0.101102,\n \"samples_ns\": [ 20272299181, 20384817558, 20226538450 ],\n \"samples_ts\": [ 25.2561, 25.1167, 25.3133 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:05:17Z", "avg_ns": 4625402712, "stddev_ns": 313844497, "avg_ts": 111.020977, "stddev_ts": 7.249103, "samples_ns": [ 4446029219, 4987793001, 4442385918 ], "samples_ts": [ 115.159, 102.651, 115.253 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T20:05:36Z", "avg_ns": 20294551729, "stddev_ns": 81452388, "avg_ts": 25.228716, "stddev_ts": 0.101102, "samples_ns": [ 20272299181, 20384817558, 20226538450 ], "samples_ts": [ 25.2561, 25.1167, 25.3133 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 512, "ubatch_size": 128, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 3 }, { "timestamp_utc": "2025-12-08T20:06:57.110539+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:06:37Z\",\n \"avg_ns\": 1102196216,\n \"stddev_ns\": 2757588,\n \"avg_ts\": 116.132256,\n \"stddev_ts\": 0.290238,\n \"samples_ns\": [ 1101452244, 1099887351, 1105249055 ],\n \"samples_ts\": [ 116.21, 116.376, 115.811 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:06:42Z\",\n \"avg_ns\": 4960849973,\n \"stddev_ns\": 32319276,\n \"avg_ts\": 25.802763,\n \"stddev_ts\": 0.168670,\n \"samples_ns\": [ 4923975529, 4984261347, 4974313043 ],\n \"samples_ts\": [ 25.9953, 25.6808, 25.7322 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:06:37Z", "avg_ns": 1102196216, "stddev_ns": 2757588, "avg_ts": 116.132256, "stddev_ts": 0.290238, "samples_ns": [ 1101452244, 1099887351, 1105249055 ], "samples_ts": [ 116.21, 116.376, 115.811 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T20:06:42Z", "avg_ns": 4960849973, "stddev_ns": 32319276, "avg_ts": 25.802763, "stddev_ts": 0.16867, "samples_ns": [ 4923975529, 4984261347, 4974313043 ], "samples_ts": [ 25.9953, 25.6808, 25.7322 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 512, "ubatch_size": 256, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 4 }, { "timestamp_utc": "2025-12-08T20:08:03.116774+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:06:57Z\",\n \"avg_ns\": 1106049596,\n \"stddev_ns\": 2007084,\n \"avg_ts\": 115.727434,\n \"stddev_ts\": 0.210126,\n \"samples_ns\": [ 1106624293, 1103818135, 1107706361 ],\n \"samples_ts\": [ 115.667, 115.961, 115.554 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:07:02Z\",\n \"avg_ns\": 20297889904,\n \"stddev_ns\": 76909551,\n \"avg_ts\": 25.224539,\n \"stddev_ts\": 0.095694,\n \"samples_ns\": [ 20364590505, 20213762534, 20315316675 ],\n \"samples_ts\": [ 25.1417, 25.3293, 25.2027 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:06:57Z", "avg_ns": 1106049596, "stddev_ns": 2007084, "avg_ts": 115.727434, "stddev_ts": 0.210126, "samples_ns": [ 1106624293, 1103818135, 1107706361 ], "samples_ts": [ 115.667, 115.961, 115.554 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T20:07:02Z", "avg_ns": 20297889904, "stddev_ns": 76909551, "avg_ts": 25.224539, "stddev_ts": 0.095694, "samples_ns": [ 20364590505, 20213762534, 20315316675 ], "samples_ts": [ 25.1417, 25.3293, 25.2027 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 512, "ubatch_size": 256, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 5 }, { "timestamp_utc": "2025-12-08T20:08:36.623513+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:08:03Z\",\n \"avg_ns\": 4476377813,\n \"stddev_ns\": 2314148,\n \"avg_ts\": 114.378212,\n \"stddev_ts\": 0.059063,\n \"samples_ns\": [ 4474864022, 4475230069, 4479039350 ],\n \"samples_ts\": [ 114.417, 114.408, 114.31 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:08:21Z\",\n \"avg_ns\": 4959751842,\n \"stddev_ns\": 28627467,\n \"avg_ts\": 25.808316,\n \"stddev_ts\": 0.149088,\n \"samples_ns\": [ 4929853106, 4962491743, 4986910679 ],\n \"samples_ts\": [ 25.9643, 25.7935, 25.6672 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:08:03Z", "avg_ns": 4476377813, "stddev_ns": 2314148, "avg_ts": 114.378212, "stddev_ts": 0.059063, "samples_ns": [ 4474864022, 4475230069, 4479039350 ], "samples_ts": [ 114.417, 114.408, 114.31 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T20:08:21Z", "avg_ns": 4959751842, "stddev_ns": 28627467, "avg_ts": 25.808316, "stddev_ts": 0.149088, "samples_ns": [ 4929853106, 4962491743, 4986910679 ], "samples_ts": [ 25.9643, 25.7935, 25.6672 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 512, "ubatch_size": 256, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 6 }, { "timestamp_utc": "2025-12-08T20:09:57.999516+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:08:37Z\",\n \"avg_ns\": 4479210778,\n \"stddev_ns\": 7260134,\n \"avg_ts\": 114.306051,\n \"stddev_ts\": 0.185186,\n \"samples_ns\": [ 4486988522, 4478029420, 4472614394 ],\n \"samples_ts\": [ 114.108, 114.336, 114.474 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:08:55Z\",\n \"avg_ns\": 20918691635,\n \"stddev_ns\": 251863715,\n \"avg_ts\": 24.478068,\n \"stddev_ts\": 0.292705,\n \"samples_ns\": [ 21209201523, 20761675114, 20785198268 ],\n \"samples_ts\": [ 24.1405, 24.6608, 24.6329 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:08:37Z", "avg_ns": 4479210778, "stddev_ns": 7260134, "avg_ts": 114.306051, "stddev_ts": 0.185186, "samples_ns": [ 4486988522, 4478029420, 4472614394 ], "samples_ts": [ 114.108, 114.336, 114.474 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T20:08:55Z", "avg_ns": 20918691635, "stddev_ns": 251863715, "avg_ts": 24.478068, "stddev_ts": 0.292705, "samples_ns": [ 21209201523, 20761675114, 20785198268 ], "samples_ts": [ 24.1405, 24.6608, 24.6329 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 512, "ubatch_size": 256, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 7 }, { "timestamp_utc": "2025-12-08T20:10:19.213721+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:09:58Z\",\n \"avg_ns\": 1105184118,\n \"stddev_ns\": 3629888,\n \"avg_ts\": 115.818638,\n \"stddev_ts\": 0.379925,\n \"samples_ns\": [ 1102125251, 1109194980, 1104232124 ],\n \"samples_ts\": [ 116.139, 115.399, 115.918 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:10:02Z\",\n \"avg_ns\": 5374942504,\n \"stddev_ns\": 345628570,\n \"avg_ts\": 23.882570,\n \"stddev_ts\": 1.594676,\n \"samples_ns\": [ 5585886941, 4976066235, 5562874337 ],\n \"samples_ts\": [ 22.9149, 25.7231, 23.0097 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:09:58Z", "avg_ns": 1105184118, "stddev_ns": 3629888, "avg_ts": 115.818638, "stddev_ts": 0.379925, "samples_ns": [ 1102125251, 1109194980, 1104232124 ], "samples_ts": [ 116.139, 115.399, 115.918 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T20:10:02Z", "avg_ns": 5374942504, "stddev_ns": 345628570, "avg_ts": 23.88257, "stddev_ts": 1.594676, "samples_ns": [ 5585886941, 4976066235, 5562874337 ], "samples_ts": [ 22.9149, 25.7231, 23.0097 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 512, "ubatch_size": 512, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 8 }, { "timestamp_utc": "2025-12-08T20:11:26.215949+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:10:19Z\",\n \"avg_ns\": 1102747557,\n \"stddev_ns\": 2343874,\n \"avg_ts\": 116.074060,\n \"stddev_ts\": 0.246939,\n \"samples_ns\": [ 1103746033, 1104426353, 1100070287 ],\n \"samples_ts\": [ 115.969, 115.897, 116.356 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:10:24Z\",\n \"avg_ns\": 20639936662,\n \"stddev_ns\": 351422511,\n \"avg_ts\": 24.811120,\n \"stddev_ts\": 0.426624,\n \"samples_ns\": [ 20833807997, 20234280749, 20851721241 ],\n \"samples_ts\": [ 24.5754, 25.3036, 24.5543 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:10:19Z", "avg_ns": 1102747557, "stddev_ns": 2343874, "avg_ts": 116.07406, "stddev_ts": 0.246939, "samples_ns": [ 1103746033, 1104426353, 1100070287 ], "samples_ts": [ 115.969, 115.897, 116.356 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T20:10:24Z", "avg_ns": 20639936662, "stddev_ns": 351422511, "avg_ts": 24.81112, "stddev_ts": 0.426624, "samples_ns": [ 20833807997, 20234280749, 20851721241 ], "samples_ts": [ 24.5754, 25.3036, 24.5543 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 512, "ubatch_size": 512, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 9 }, { "timestamp_utc": "2025-12-08T20:12:01.164077+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:11:26Z\",\n \"avg_ns\": 4690328629,\n \"stddev_ns\": 2167827,\n \"avg_ts\": 109.160810,\n \"stddev_ts\": 0.050392,\n \"samples_ns\": [ 4689609708, 4692762327, 4688613854 ],\n \"samples_ts\": [ 109.178, 109.104, 109.201 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:11:45Z\",\n \"avg_ns\": 5160083525,\n \"stddev_ns\": 364142446,\n \"avg_ts\": 24.885285,\n \"stddev_ts\": 1.689607,\n \"samples_ns\": [ 5579161187, 4920877437, 4980211953 ],\n \"samples_ts\": [ 22.9425, 26.0116, 25.7017 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:11:26Z", "avg_ns": 4690328629, "stddev_ns": 2167827, "avg_ts": 109.16081, "stddev_ts": 0.050392, "samples_ns": [ 4689609708, 4692762327, 4688613854 ], "samples_ts": [ 109.178, 109.104, 109.201 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T20:11:45Z", "avg_ns": 5160083525, "stddev_ns": 364142446, "avg_ts": 24.885285, "stddev_ts": 1.689607, "samples_ns": [ 5579161187, 4920877437, 4980211953 ], "samples_ts": [ 22.9425, 26.0116, 25.7017 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 512, "ubatch_size": 512, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 10 }, { "timestamp_utc": "2025-12-08T20:13:22.355728+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:12:01Z\",\n \"avg_ns\": 4794397539,\n \"stddev_ns\": 8287062,\n \"avg_ts\": 106.791524,\n \"stddev_ts\": 0.184739,\n \"samples_ns\": [ 4784947905, 4800423939, 4797820775 ],\n \"samples_ts\": [ 107.002, 106.657, 106.715 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:12:20Z\",\n \"avg_ns\": 20425213054,\n \"stddev_ns\": 349850401,\n \"avg_ts\": 25.071914,\n \"stddev_ts\": 0.425241,\n \"samples_ns\": [ 20829123924, 20229364171, 20217151068 ],\n \"samples_ts\": [ 24.581, 25.3097, 25.325 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:12:01Z", "avg_ns": 4794397539, "stddev_ns": 8287062, "avg_ts": 106.791524, "stddev_ts": 0.184739, "samples_ns": [ 4784947905, 4800423939, 4797820775 ], "samples_ts": [ 107.002, 106.657, 106.715 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T20:12:20Z", "avg_ns": 20425213054, "stddev_ns": 349850401, "avg_ts": 25.071914, "stddev_ts": 0.425241, "samples_ns": [ 20829123924, 20229364171, 20217151068 ], "samples_ts": [ 24.581, 25.3097, 25.325 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 512, "ubatch_size": 512, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 11 }, { "timestamp_utc": "2025-12-08T20:13:42.876553+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:13:22Z\",\n \"avg_ns\": 1292471578,\n \"stddev_ns\": 315915571,\n \"avg_ts\": 102.616856,\n \"stddev_ts\": 21.980772,\n \"samples_ns\": [ 1108853253, 1111304768, 1657256713 ],\n \"samples_ts\": [ 115.435, 115.18, 77.2361 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:13:27Z\",\n \"avg_ns\": 4952994205,\n \"stddev_ns\": 30181803,\n \"avg_ts\": 25.843592,\n \"stddev_ts\": 0.157039,\n \"samples_ns\": [ 4942332879, 4929589988, 4987059748 ],\n \"samples_ts\": [ 25.8987, 25.9656, 25.6664 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:13:22Z", "avg_ns": 1292471578, "stddev_ns": 315915571, "avg_ts": 102.616856, "stddev_ts": 21.980772, "samples_ns": [ 1108853253, 1111304768, 1657256713 ], "samples_ts": [ 115.435, 115.18, 77.2361 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T20:13:27Z", "avg_ns": 4952994205, "stddev_ns": 30181803, "avg_ts": 25.843592, "stddev_ts": 0.157039, "samples_ns": [ 4942332879, 4929589988, 4987059748 ], "samples_ts": [ 25.8987, 25.9656, 25.6664 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 12 }, { "timestamp_utc": "2025-12-08T20:14:56.653716+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:13:43Z\",\n \"avg_ns\": 1102966892,\n \"stddev_ns\": 652592,\n \"avg_ts\": 116.050654,\n \"stddev_ts\": 0.068505,\n \"samples_ns\": [ 1103461701, 1103209432, 1102229545 ],\n \"samples_ts\": [ 115.999, 116.025, 116.128 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:13:47Z\",\n \"avg_ns\": 22896565701,\n \"stddev_ns\": 650794940,\n \"avg_ts\": 22.373291,\n \"stddev_ts\": 0.625852,\n \"samples_ns\": [ 22479687094, 22563531850, 23646478161 ],\n \"samples_ts\": [ 22.7761, 22.6915, 21.6523 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:13:43Z", "avg_ns": 1102966892, "stddev_ns": 652592, "avg_ts": 116.050654, "stddev_ts": 0.068505, "samples_ns": [ 1103461701, 1103209432, 1102229545 ], "samples_ts": [ 115.999, 116.025, 116.128 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T20:13:47Z", "avg_ns": 22896565701, "stddev_ns": 650794940, "avg_ts": 22.373291, "stddev_ts": 0.625852, "samples_ns": [ 22479687094, 22563531850, 23646478161 ], "samples_ts": [ 22.7761, 22.6915, 21.6523 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 13 }, { "timestamp_utc": "2025-12-08T20:15:30.117658+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:14:57Z\",\n \"avg_ns\": 4459336183,\n \"stddev_ns\": 8598832,\n \"avg_ts\": 114.815580,\n \"stddev_ts\": 0.221483,\n \"samples_ns\": [ 4460621452, 4450167323, 4467219775 ],\n \"samples_ts\": [ 114.782, 115.052, 114.613 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:15:15Z\",\n \"avg_ns\": 4977866472,\n \"stddev_ns\": 31717602,\n \"avg_ts\": 25.714525,\n \"stddev_ts\": 0.164279,\n \"samples_ns\": [ 5003674782, 4987466865, 4942457769 ],\n \"samples_ts\": [ 25.5812, 25.6643, 25.898 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:14:57Z", "avg_ns": 4459336183, "stddev_ns": 8598832, "avg_ts": 114.81558, "stddev_ts": 0.221483, "samples_ns": [ 4460621452, 4450167323, 4467219775 ], "samples_ts": [ 114.782, 115.052, 114.613 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T20:15:15Z", "avg_ns": 4977866472, "stddev_ns": 31717602, "avg_ts": 25.714525, "stddev_ts": 0.164279, "samples_ns": [ 5003674782, 4987466865, 4942457769 ], "samples_ts": [ 25.5812, 25.6643, 25.898 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 14 }, { "timestamp_utc": "2025-12-08T20:16:49.919883+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:15:30Z\",\n \"avg_ns\": 4460933992,\n \"stddev_ns\": 6750753,\n \"avg_ts\": 114.774346,\n \"stddev_ts\": 0.173832,\n \"samples_ns\": [ 4453141155, 4464682182, 4464978640 ],\n \"samples_ts\": [ 114.975, 114.678, 114.67 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:15:48Z\",\n \"avg_ns\": 20434866792,\n \"stddev_ns\": 79067402,\n \"avg_ts\": 25.055465,\n \"stddev_ts\": 0.096734,\n \"samples_ns\": [ 20525935322, 20383715915, 20394949139 ],\n \"samples_ts\": [ 24.9441, 25.1181, 25.1043 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:15:30Z", "avg_ns": 4460933992, "stddev_ns": 6750753, "avg_ts": 114.774346, "stddev_ts": 0.173832, "samples_ns": [ 4453141155, 4464682182, 4464978640 ], "samples_ts": [ 114.975, 114.678, 114.67 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T20:15:48Z", "avg_ns": 20434866792, "stddev_ns": 79067402, "avg_ts": 25.055465, "stddev_ts": 0.096734, "samples_ns": [ 20525935322, 20383715915, 20394949139 ], "samples_ts": [ 24.9441, 25.1181, 25.1043 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 15 }, { "timestamp_utc": "2025-12-08T20:17:10.585724+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:16:50Z\",\n \"avg_ns\": 1104539903,\n \"stddev_ns\": 1489854,\n \"avg_ts\": 115.885496,\n \"stddev_ts\": 0.156253,\n \"samples_ns\": [ 1105981586, 1104630617, 1103007508 ],\n \"samples_ts\": [ 115.734, 115.876, 116.046 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:16:54Z\",\n \"avg_ns\": 5179965667,\n \"stddev_ns\": 338399634,\n \"avg_ts\": 24.778558,\n \"stddev_ts\": 1.560667,\n \"samples_ns\": [ 5570198741, 5002253541, 4967444721 ],\n \"samples_ts\": [ 22.9794, 25.5885, 25.7678 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:16:50Z", "avg_ns": 1104539903, "stddev_ns": 1489854, "avg_ts": 115.885496, "stddev_ts": 0.156253, "samples_ns": [ 1105981586, 1104630617, 1103007508 ], "samples_ts": [ 115.734, 115.876, 116.046 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T20:16:54Z", "avg_ns": 5179965667, "stddev_ns": 338399634, "avg_ts": 24.778558, "stddev_ts": 1.560667, "samples_ns": [ 5570198741, 5002253541, 4967444721 ], "samples_ts": [ 22.9794, 25.5885, 25.7678 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 16 }, { "timestamp_utc": "2025-12-08T20:18:22.169253+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:17:11Z\",\n \"avg_ns\": 1101695893,\n \"stddev_ns\": 2639438,\n \"avg_ts\": 116.184957,\n \"stddev_ts\": 0.278718,\n \"samples_ns\": [ 1098668859, 1102902108, 1103516712 ],\n \"samples_ts\": [ 116.505, 116.057, 115.993 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:17:15Z\",\n \"avg_ns\": 22166510127,\n \"stddev_ns\": 800992569,\n \"avg_ts\": 23.118242,\n \"stddev_ts\": 0.844248,\n \"samples_ns\": [ 21299876415, 22879645819, 22320008148 ],\n \"samples_ts\": [ 24.0377, 22.378, 22.9391 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:17:11Z", "avg_ns": 1101695893, "stddev_ns": 2639438, "avg_ts": 116.184957, "stddev_ts": 0.278718, "samples_ns": [ 1098668859, 1102902108, 1103516712 ], "samples_ts": [ 116.505, 116.057, 115.993 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T20:17:15Z", "avg_ns": 22166510127, "stddev_ns": 800992569, "avg_ts": 23.118242, "stddev_ts": 0.844248, "samples_ns": [ 21299876415, 22879645819, 22320008148 ], "samples_ts": [ 24.0377, 22.378, 22.9391 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 17 }, { "timestamp_utc": "2025-12-08T20:18:55.933715+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:18:22Z\",\n \"avg_ns\": 4579015554,\n \"stddev_ns\": 4946767,\n \"avg_ts\": 111.814514,\n \"stddev_ts\": 0.120843,\n \"samples_ns\": [ 4581273232, 4573343752, 4582429680 ],\n \"samples_ts\": [ 111.759, 111.953, 111.731 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:18:41Z\",\n \"avg_ns\": 4928851233,\n \"stddev_ns\": 18073776,\n \"avg_ts\": 25.969772,\n \"stddev_ts\": 0.095037,\n \"samples_ns\": [ 4949604447, 4916569464, 4920379790 ],\n \"samples_ts\": [ 25.8607, 26.0344, 26.0143 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:18:22Z", "avg_ns": 4579015554, "stddev_ns": 4946767, "avg_ts": 111.814514, "stddev_ts": 0.120843, "samples_ns": [ 4581273232, 4573343752, 4582429680 ], "samples_ts": [ 111.759, 111.953, 111.731 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T20:18:41Z", "avg_ns": 4928851233, "stddev_ns": 18073776, "avg_ts": 25.969772, "stddev_ts": 0.095037, "samples_ns": [ 4949604447, 4916569464, 4920379790 ], "samples_ts": [ 25.8607, 26.0344, 26.0143 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 18 }, { "timestamp_utc": "2025-12-08T20:20:15.249296+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:18:56Z\",\n \"avg_ns\": 4472409237,\n \"stddev_ns\": 17716890,\n \"avg_ts\": 114.480883,\n \"stddev_ts\": 0.453579,\n \"samples_ns\": [ 4489855648, 4472937862, 4454434203 ],\n \"samples_ts\": [ 114.035, 114.466, 114.942 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:19:14Z\",\n \"avg_ns\": 20249860285,\n \"stddev_ns\": 39821615,\n \"avg_ts\": 25.284190,\n \"stddev_ts\": 0.049701,\n \"samples_ns\": [ 20213261250, 20292266731, 20244052874 ],\n \"samples_ts\": [ 25.3299, 25.2313, 25.2914 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:18:56Z", "avg_ns": 4472409237, "stddev_ns": 17716890, "avg_ts": 114.480883, "stddev_ts": 0.453579, "samples_ns": [ 4489855648, 4472937862, 4454434203 ], "samples_ts": [ 114.035, 114.466, 114.942 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T20:19:14Z", "avg_ns": 20249860285, "stddev_ns": 39821615, "avg_ts": 25.28419, "stddev_ts": 0.049701, "samples_ns": [ 20213261250, 20292266731, 20244052874 ], "samples_ts": [ 25.3299, 25.2313, 25.2914 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 19 }, { "timestamp_utc": "2025-12-08T20:20:35.773016+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:20:15Z\",\n \"avg_ns\": 1102130631,\n \"stddev_ns\": 6152721,\n \"avg_ts\": 116.141092,\n \"stddev_ts\": 0.647159,\n \"samples_ns\": [ 1108882015, 1096839192, 1100670686 ],\n \"samples_ts\": [ 115.432, 116.699, 116.293 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:20:20Z\",\n \"avg_ns\": 5149274508,\n \"stddev_ns\": 337030802,\n \"avg_ts\": 24.926469,\n \"stddev_ts\": 1.572102,\n \"samples_ns\": [ 4951950112, 5538431222, 4957442192 ],\n \"samples_ts\": [ 25.8484, 23.1112, 25.8198 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:20:15Z", "avg_ns": 1102130631, "stddev_ns": 6152721, "avg_ts": 116.141092, "stddev_ts": 0.647159, "samples_ns": [ 1108882015, 1096839192, 1100670686 ], "samples_ts": [ 115.432, 116.699, 116.293 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T20:20:20Z", "avg_ns": 5149274508, "stddev_ns": 337030802, "avg_ts": 24.926469, "stddev_ts": 1.572102, "samples_ns": [ 4951950112, 5538431222, 4957442192 ], "samples_ts": [ 25.8484, 23.1112, 25.8198 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 20 }, { "timestamp_utc": "2025-12-08T20:21:42.168719+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:20:36Z\",\n \"avg_ns\": 1119817838,\n \"stddev_ns\": 2037030,\n \"avg_ts\": 114.304557,\n \"stddev_ts\": 0.207781,\n \"samples_ns\": [ 1119478379, 1117972419, 1122002718 ],\n \"samples_ts\": [ 114.339, 114.493, 114.082 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:20:40Z\",\n \"avg_ns\": 20414553882,\n \"stddev_ns\": 340529390,\n \"avg_ts\": 25.084757,\n \"stddev_ts\": 0.414648,\n \"samples_ns\": [ 20182763018, 20805522017, 20255376612 ],\n \"samples_ts\": [ 25.3682, 24.6089, 25.2772 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:20:36Z", "avg_ns": 1119817838, "stddev_ns": 2037030, "avg_ts": 114.304557, "stddev_ts": 0.207781, "samples_ns": [ 1119478379, 1117972419, 1122002718 ], "samples_ts": [ 114.339, 114.493, 114.082 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T20:20:40Z", "avg_ns": 20414553882, "stddev_ns": 340529390, "avg_ts": 25.084757, "stddev_ts": 0.414648, "samples_ns": [ 20182763018, 20805522017, 20255376612 ], "samples_ts": [ 25.3682, 24.6089, 25.2772 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 21 }, { "timestamp_utc": "2025-12-08T20:22:17.042348+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:21:42Z\",\n \"avg_ns\": 4881319842,\n \"stddev_ns\": 316997634,\n \"avg_ts\": 105.174697,\n \"stddev_ts\": 6.583672,\n \"samples_ns\": [ 4692844153, 5247302377, 4703812998 ],\n \"samples_ts\": [ 109.102, 97.5739, 108.848 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:22:02Z\",\n \"avg_ns\": 4944474523,\n \"stddev_ns\": 48501185,\n \"avg_ts\": 25.889137,\n \"stddev_ts\": 0.252907,\n \"samples_ns\": [ 4998744366, 4905362822, 4929316383 ],\n \"samples_ts\": [ 25.6064, 26.0939, 25.9671 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:21:42Z", "avg_ns": 4881319842, "stddev_ns": 316997634, "avg_ts": 105.174697, "stddev_ts": 6.583672, "samples_ns": [ 4692844153, 5247302377, 4703812998 ], "samples_ts": [ 109.102, 97.5739, 108.848 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T20:22:02Z", "avg_ns": 4944474523, "stddev_ns": 48501185, "avg_ts": 25.889137, "stddev_ts": 0.252907, "samples_ns": [ 4998744366, 4905362822, 4929316383 ], "samples_ts": [ 25.6064, 26.0939, 25.9671 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 22 }, { "timestamp_utc": "2025-12-08T20:23:43.035242+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:22:17Z\",\n \"avg_ns\": 4706107607,\n \"stddev_ns\": 6394865,\n \"avg_ts\": 108.794926,\n \"stddev_ts\": 0.147757,\n \"samples_ns\": [ 4713208483, 4704311444, 4700802894 ],\n \"samples_ts\": [ 108.631, 108.836, 108.918 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:22:36Z\",\n \"avg_ns\": 22166222320,\n \"stddev_ns\": 165358774,\n \"avg_ts\": 23.099068,\n \"stddev_ts\": 0.173008,\n \"samples_ns\": [ 21976892252, 22282313879, 22239460830 ],\n \"samples_ts\": [ 23.2972, 22.9779, 23.0221 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:22:17Z", "avg_ns": 4706107607, "stddev_ns": 6394865, "avg_ts": 108.794926, "stddev_ts": 0.147757, "samples_ns": [ 4713208483, 4704311444, 4700802894 ], "samples_ts": [ 108.631, 108.836, 108.918 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T20:22:36Z", "avg_ns": 22166222320, "stddev_ns": 165358774, "avg_ts": 23.099068, "stddev_ts": 0.173008, "samples_ns": [ 21976892252, 22282313879, 22239460830 ], "samples_ts": [ 23.2972, 22.9779, 23.0221 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 23 }, { "timestamp_utc": "2025-12-08T20:24:03.635119+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:23:43Z\",\n \"avg_ns\": 1101291814,\n \"stddev_ns\": 1321623,\n \"avg_ts\": 116.227253,\n \"stddev_ts\": 0.139473,\n \"samples_ns\": [ 1102505449, 1099884432, 1101485562 ],\n \"samples_ts\": [ 116.099, 116.376, 116.207 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:23:48Z\",\n \"avg_ns\": 5166143637,\n \"stddev_ns\": 330263379,\n \"avg_ts\": 24.841999,\n \"stddev_ts\": 1.532142,\n \"samples_ns\": [ 4960702985, 5547107568, 4990620358 ],\n \"samples_ts\": [ 25.8028, 23.0751, 25.6481 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:23:43Z", "avg_ns": 1101291814, "stddev_ns": 1321623, "avg_ts": 116.227253, "stddev_ts": 0.139473, "samples_ns": [ 1102505449, 1099884432, 1101485562 ], "samples_ts": [ 116.099, 116.376, 116.207 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T20:23:48Z", "avg_ns": 5166143637, "stddev_ns": 330263379, "avg_ts": 24.841999, "stddev_ts": 1.532142, "samples_ns": [ 4960702985, 5547107568, 4990620358 ], "samples_ts": [ 25.8028, 23.0751, 25.6481 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 24 }, { "timestamp_utc": "2025-12-08T20:25:12.311068+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:24:04Z\",\n \"avg_ns\": 1099775269,\n \"stddev_ns\": 1011720,\n \"avg_ts\": 116.387480,\n \"stddev_ts\": 0.107068,\n \"samples_ns\": [ 1100367871, 1100350230, 1098607707 ],\n \"samples_ts\": [ 116.325, 116.327, 116.511 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:24:08Z\",\n \"avg_ns\": 21186654955,\n \"stddev_ns\": 855002405,\n \"avg_ts\": 24.192175,\n \"stddev_ts\": 0.967343,\n \"samples_ns\": [ 22104646445, 21042298354, 20413020068 ],\n \"samples_ts\": [ 23.1626, 24.3319, 25.082 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:24:04Z", "avg_ns": 1099775269, "stddev_ns": 1011720, "avg_ts": 116.38748, "stddev_ts": 0.107068, "samples_ns": [ 1100367871, 1100350230, 1098607707 ], "samples_ts": [ 116.325, 116.327, 116.511 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T20:24:08Z", "avg_ns": 21186654955, "stddev_ns": 855002405, "avg_ts": 24.192175, "stddev_ts": 0.967343, "samples_ns": [ 22104646445, 21042298354, 20413020068 ], "samples_ts": [ 23.1626, 24.3319, 25.082 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 25 }, { "timestamp_utc": "2025-12-08T20:25:45.681680+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:25:12Z\",\n \"avg_ns\": 4448221089,\n \"stddev_ns\": 4024378,\n \"avg_ts\": 115.102255,\n \"stddev_ts\": 0.104176,\n \"samples_ns\": [ 4443704110, 4449534435, 4451424722 ],\n \"samples_ts\": [ 115.219, 115.068, 115.019 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:25:30Z\",\n \"avg_ns\": 4971029571,\n \"stddev_ns\": 14029490,\n \"avg_ts\": 25.749330,\n \"stddev_ts\": 0.072665,\n \"samples_ns\": [ 4985170833, 4970802928, 4957114953 ],\n \"samples_ts\": [ 25.6762, 25.7504, 25.8215 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:25:12Z", "avg_ns": 4448221089, "stddev_ns": 4024378, "avg_ts": 115.102255, "stddev_ts": 0.104176, "samples_ns": [ 4443704110, 4449534435, 4451424722 ], "samples_ts": [ 115.219, 115.068, 115.019 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T20:25:30Z", "avg_ns": 4971029571, "stddev_ns": 14029490, "avg_ts": 25.74933, "stddev_ts": 0.072665, "samples_ns": [ 4985170833, 4970802928, 4957114953 ], "samples_ts": [ 25.6762, 25.7504, 25.8215 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 26 }, { "timestamp_utc": "2025-12-08T20:27:10.946572+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:25:46Z\",\n \"avg_ns\": 4636817119,\n \"stddev_ns\": 337230205,\n \"avg_ts\": 110.795533,\n \"stddev_ts\": 7.733409,\n \"samples_ns\": [ 4444837259, 4439409697, 5026204402 ],\n \"samples_ts\": [ 115.19, 115.331, 101.866 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:26:04Z\",\n \"avg_ns\": 22064628595,\n \"stddev_ns\": 150235310,\n \"avg_ts\": 23.205280,\n \"stddev_ts\": 0.158570,\n \"samples_ns\": [ 21892913438, 22171842137, 22129130210 ],\n \"samples_ts\": [ 23.3866, 23.0924, 23.1369 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:25:46Z", "avg_ns": 4636817119, "stddev_ns": 337230205, "avg_ts": 110.795533, "stddev_ts": 7.733409, "samples_ns": [ 4444837259, 4439409697, 5026204402 ], "samples_ts": [ 115.19, 115.331, 101.866 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T20:26:04Z", "avg_ns": 22064628595, "stddev_ns": 150235310, "avg_ts": 23.20528, "stddev_ts": 0.15857, "samples_ns": [ 21892913438, 22171842137, 22129130210 ], "samples_ts": [ 23.3866, 23.0924, 23.1369 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 27 }, { "timestamp_utc": "2025-12-08T20:27:30.854812+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:27:11Z\",\n \"avg_ns\": 1104636585,\n \"stddev_ns\": 4518513,\n \"avg_ts\": 115.876508,\n \"stddev_ts\": 0.474885,\n \"samples_ns\": [ 1108134796, 1099535205, 1106239755 ],\n \"samples_ts\": [ 115.509, 116.413, 115.707 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:27:15Z\",\n \"avg_ns\": 4935855754,\n \"stddev_ns\": 20754296,\n \"avg_ts\": 25.932992,\n \"stddev_ts\": 0.109085,\n \"samples_ns\": [ 4955944837, 4937127243, 4914495184 ],\n \"samples_ts\": [ 25.8276, 25.926, 26.0454 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:27:11Z", "avg_ns": 1104636585, "stddev_ns": 4518513, "avg_ts": 115.876508, "stddev_ts": 0.474885, "samples_ns": [ 1108134796, 1099535205, 1106239755 ], "samples_ts": [ 115.509, 116.413, 115.707 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T20:27:15Z", "avg_ns": 4935855754, "stddev_ns": 20754296, "avg_ts": 25.932992, "stddev_ts": 0.109085, "samples_ns": [ 4955944837, 4937127243, 4914495184 ], "samples_ts": [ 25.8276, 25.926, 26.0454 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 28 }, { "timestamp_utc": "2025-12-08T20:28:41.790455+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:27:31Z\",\n \"avg_ns\": 1106684147,\n \"stddev_ns\": 808378,\n \"avg_ts\": 115.660865,\n \"stddev_ts\": 0.084504,\n \"samples_ns\": [ 1106866796, 1107385574, 1105800071 ],\n \"samples_ts\": [ 115.642, 115.588, 115.753 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:27:35Z\",\n \"avg_ns\": 21939110123,\n \"stddev_ns\": 594930932,\n \"avg_ts\": 23.348947,\n \"stddev_ts\": 0.643232,\n \"samples_ns\": [ 21252145757, 22284247415, 22280937199 ],\n \"samples_ts\": [ 24.0917, 22.9759, 22.9793 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:27:31Z", "avg_ns": 1106684147, "stddev_ns": 808378, "avg_ts": 115.660865, "stddev_ts": 0.084504, "samples_ns": [ 1106866796, 1107385574, 1105800071 ], "samples_ts": [ 115.642, 115.588, 115.753 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T20:27:35Z", "avg_ns": 21939110123, "stddev_ns": 594930932, "avg_ts": 23.348947, "stddev_ts": 0.643232, "samples_ns": [ 21252145757, 22284247415, 22280937199 ], "samples_ts": [ 24.0917, 22.9759, 22.9793 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 29 }, { "timestamp_utc": "2025-12-08T20:29:15.302281+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:28:42Z\",\n \"avg_ns\": 4484191379,\n \"stddev_ns\": 4902134,\n \"avg_ts\": 114.178982,\n \"stddev_ts\": 0.124838,\n \"samples_ns\": [ 4488697385, 4478972172, 4484904581 ],\n \"samples_ts\": [ 114.064, 114.312, 114.161 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:29:00Z\",\n \"avg_ns\": 4970309734,\n \"stddev_ns\": 21968630,\n \"avg_ts\": 25.753258,\n \"stddev_ts\": 0.114106,\n \"samples_ns\": [ 4985134354, 4945070934, 4980723916 ],\n \"samples_ts\": [ 25.6763, 25.8844, 25.6991 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:28:42Z", "avg_ns": 4484191379, "stddev_ns": 4902134, "avg_ts": 114.178982, "stddev_ts": 0.124838, "samples_ns": [ 4488697385, 4478972172, 4484904581 ], "samples_ts": [ 114.064, 114.312, 114.161 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T20:29:00Z", "avg_ns": 4970309734, "stddev_ns": 21968630, "avg_ts": 25.753258, "stddev_ts": 0.114106, "samples_ns": [ 4985134354, 4945070934, 4980723916 ], "samples_ts": [ 25.6763, 25.8844, 25.6991 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 30 }, { "timestamp_utc": "2025-12-08T20:30:34.844130+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:29:15Z\",\n \"avg_ns\": 4480067361,\n \"stddev_ns\": 7656111,\n \"avg_ts\": 114.284219,\n \"stddev_ts\": 0.195465,\n \"samples_ns\": [ 4471350627, 4485700668, 4483150789 ],\n \"samples_ts\": [ 114.507, 114.14, 114.205 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:29:33Z\",\n \"avg_ns\": 20304342910,\n \"stddev_ns\": 101369383,\n \"avg_ts\": 25.216698,\n \"stddev_ts\": 0.125532,\n \"samples_ns\": [ 20245570349, 20421393844, 20246064537 ],\n \"samples_ts\": [ 25.2895, 25.0717, 25.2889 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:29:15Z", "avg_ns": 4480067361, "stddev_ns": 7656111, "avg_ts": 114.284219, "stddev_ts": 0.195465, "samples_ns": [ 4471350627, 4485700668, 4483150789 ], "samples_ts": [ 114.507, 114.14, 114.205 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T20:29:33Z", "avg_ns": 20304342910, "stddev_ns": 101369383, "avg_ts": 25.216698, "stddev_ts": 0.125532, "samples_ns": [ 20245570349, 20421393844, 20246064537 ], "samples_ts": [ 25.2895, 25.0717, 25.2889 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 31 }, { "timestamp_utc": "2025-12-08T20:30:54.777050+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:30:35Z\",\n \"avg_ns\": 1110667718,\n \"stddev_ns\": 1080090,\n \"avg_ts\": 115.246062,\n \"stddev_ts\": 0.111986,\n \"samples_ns\": [ 1110429757, 1109726975, 1111846423 ],\n \"samples_ts\": [ 115.271, 115.344, 115.124 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:30:39Z\",\n \"avg_ns\": 4939995584,\n \"stddev_ns\": 12942653,\n \"avg_ts\": 25.911073,\n \"stddev_ts\": 0.067849,\n \"samples_ns\": [ 4928023470, 4953727851, 4938235433 ],\n \"samples_ts\": [ 25.9739, 25.8391, 25.9202 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:30:35Z", "avg_ns": 1110667718, "stddev_ns": 1080090, "avg_ts": 115.246062, "stddev_ts": 0.111986, "samples_ns": [ 1110429757, 1109726975, 1111846423 ], "samples_ts": [ 115.271, 115.344, 115.124 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T20:30:39Z", "avg_ns": 4939995584, "stddev_ns": 12942653, "avg_ts": 25.911073, "stddev_ts": 0.067849, "samples_ns": [ 4928023470, 4953727851, 4938235433 ], "samples_ts": [ 25.9739, 25.8391, 25.9202 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 32 }, { "timestamp_utc": "2025-12-08T20:32:02.273403+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:30:55Z\",\n \"avg_ns\": 1101878650,\n \"stddev_ns\": 4018236,\n \"avg_ts\": 116.166270,\n \"stddev_ts\": 0.422791,\n \"samples_ns\": [ 1106486142, 1099101105, 1100048703 ],\n \"samples_ts\": [ 115.682, 116.459, 116.358 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:30:59Z\",\n \"avg_ns\": 20793183568,\n \"stddev_ns\": 1018223666,\n \"avg_ts\": 24.661797,\n \"stddev_ts\": 1.174501,\n \"samples_ns\": [ 21968779948, 20189283911, 20221486847 ],\n \"samples_ts\": [ 23.3058, 25.36, 25.3196 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:30:55Z", "avg_ns": 1101878650, "stddev_ns": 4018236, "avg_ts": 116.16627, "stddev_ts": 0.422791, "samples_ns": [ 1106486142, 1099101105, 1100048703 ], "samples_ts": [ 115.682, 116.459, 116.358 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T20:30:59Z", "avg_ns": 20793183568, "stddev_ns": 1018223666, "avg_ts": 24.661797, "stddev_ts": 1.174501, "samples_ns": [ 21968779948, 20189283911, 20221486847 ], "samples_ts": [ 23.3058, 25.36, 25.3196 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 33 }, { "timestamp_utc": "2025-12-08T20:32:36.779186+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:32:02Z\",\n \"avg_ns\": 4708705236,\n \"stddev_ns\": 7757190,\n \"avg_ts\": 108.734971,\n \"stddev_ts\": 0.179234,\n \"samples_ns\": [ 4715226698, 4710761666, 4700127345 ],\n \"samples_ts\": [ 108.584, 108.687, 108.933 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:32:21Z\",\n \"avg_ns\": 4983226252,\n \"stddev_ns\": 21911866,\n \"avg_ts\": 25.686501,\n \"stddev_ts\": 0.112725,\n \"samples_ns\": [ 5007873897, 4975851683, 4965953177 ],\n \"samples_ts\": [ 25.5597, 25.7242, 25.7755 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:32:02Z", "avg_ns": 4708705236, "stddev_ns": 7757190, "avg_ts": 108.734971, "stddev_ts": 0.179234, "samples_ns": [ 4715226698, 4710761666, 4700127345 ], "samples_ts": [ 108.584, 108.687, 108.933 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T20:32:21Z", "avg_ns": 4983226252, "stddev_ns": 21911866, "avg_ts": 25.686501, "stddev_ts": 0.112725, "samples_ns": [ 5007873897, 4975851683, 4965953177 ], "samples_ts": [ 25.5597, 25.7242, 25.7755 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 34 }, { "timestamp_utc": "2025-12-08T20:33:59.591441+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:32:37Z\",\n \"avg_ns\": 4710023117,\n \"stddev_ns\": 1688910,\n \"avg_ts\": 108.704359,\n \"stddev_ts\": 0.038955,\n \"samples_ns\": [ 4711144020, 4708082307, 4710843025 ],\n \"samples_ts\": [ 108.678, 108.749, 108.685 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:32:56Z\",\n \"avg_ns\": 20893355729,\n \"stddev_ns\": 182501047,\n \"avg_ts\": 24.506638,\n \"stddev_ts\": 0.213001,\n \"samples_ns\": [ 21103828844, 20779041039, 20797197305 ],\n \"samples_ts\": [ 24.261, 24.6402, 24.6187 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:32:37Z", "avg_ns": 4710023117, "stddev_ns": 1688910, "avg_ts": 108.704359, "stddev_ts": 0.038955, "samples_ns": [ 4711144020, 4708082307, 4710843025 ], "samples_ts": [ 108.678, 108.749, 108.685 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T20:32:56Z", "avg_ns": 20893355729, "stddev_ns": 182501047, "avg_ts": 24.506638, "stddev_ts": 0.213001, "samples_ns": [ 21103828844, 20779041039, 20797197305 ], "samples_ts": [ 24.261, 24.6402, 24.6187 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 35 }, { "timestamp_utc": "2025-12-08T20:34:11.059789+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:34:00Z\",\n \"avg_ns\": 580767350,\n \"stddev_ns\": 2912625,\n \"avg_ts\": 220.401768,\n \"stddev_ts\": 1.107880,\n \"samples_ns\": [ 581813345, 583012322, 577476385 ],\n \"samples_ts\": [ 220.002, 219.549, 221.654 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:34:02Z\",\n \"avg_ns\": 2829500076,\n \"stddev_ns\": 8125641,\n \"avg_ts\": 45.237922,\n \"stddev_ts\": 0.130033,\n \"samples_ns\": [ 2836487937, 2820584178, 2831428115 ],\n \"samples_ts\": [ 45.1262, 45.3807, 45.2069 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:34:00Z", "avg_ns": 580767350, "stddev_ns": 2912625, "avg_ts": 220.401768, "stddev_ts": 1.10788, "samples_ns": [ 581813345, 583012322, 577476385 ], "samples_ts": [ 220.002, 219.549, 221.654 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T20:34:02Z", "avg_ns": 2829500076, "stddev_ns": 8125641, "avg_ts": 45.237922, "stddev_ts": 0.130033, "samples_ns": [ 2836487937, 2820584178, 2831428115 ], "samples_ts": [ 45.1262, 45.3807, 45.2069 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 512, "ubatch_size": 128, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 36 }, { "timestamp_utc": "2025-12-08T20:34:48.554882+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:34:11Z\",\n \"avg_ns\": 578781264,\n \"stddev_ns\": 1844187,\n \"avg_ts\": 221.155859,\n \"stddev_ts\": 0.705782,\n \"samples_ns\": [ 579491049, 580165101, 576687642 ],\n \"samples_ts\": [ 220.883, 220.627, 221.957 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:34:13Z\",\n \"avg_ns\": 11505129720,\n \"stddev_ns\": 18601575,\n \"avg_ts\": 44.501966,\n \"stddev_ts\": 0.071898,\n \"samples_ns\": [ 11490509580, 11498812769, 11526066812 ],\n \"samples_ts\": [ 44.5585, 44.5263, 44.4211 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:34:11Z", "avg_ns": 578781264, "stddev_ns": 1844187, "avg_ts": 221.155859, "stddev_ts": 0.705782, "samples_ns": [ 579491049, 580165101, 576687642 ], "samples_ts": [ 220.883, 220.627, 221.957 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T20:34:13Z", "avg_ns": 11505129720, "stddev_ns": 18601575, "avg_ts": 44.501966, "stddev_ts": 0.071898, "samples_ns": [ 11490509580, 11498812769, 11526066812 ], "samples_ts": [ 44.5585, 44.5263, 44.4211 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 512, "ubatch_size": 128, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 37 }, { "timestamp_utc": "2025-12-08T20:35:07.542068+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:34:49Z\",\n \"avg_ns\": 2495113484,\n \"stddev_ns\": 321317579,\n \"avg_ts\": 207.335005,\n \"stddev_ts\": 24.855881,\n \"samples_ns\": [ 2866075263, 2303673927, 2315591262 ],\n \"samples_ts\": [ 178.642, 222.254, 221.11 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:34:58Z\",\n \"avg_ns\": 2839307833,\n \"stddev_ns\": 22263781,\n \"avg_ts\": 45.083260,\n \"stddev_ts\": 0.353867,\n \"samples_ns\": [ 2841177197, 2860577995, 2816168307 ],\n \"samples_ts\": [ 45.0517, 44.7462, 45.4518 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:34:49Z", "avg_ns": 2495113484, "stddev_ns": 321317579, "avg_ts": 207.335005, "stddev_ts": 24.855881, "samples_ns": [ 2866075263, 2303673927, 2315591262 ], "samples_ts": [ 178.642, 222.254, 221.11 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T20:34:58Z", "avg_ns": 2839307833, "stddev_ns": 22263781, "avg_ts": 45.08326, "stddev_ts": 0.353867, "samples_ns": [ 2841177197, 2860577995, 2816168307 ], "samples_ts": [ 45.0517, 44.7462, 45.4518 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 512, "ubatch_size": 128, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 38 }, { "timestamp_utc": "2025-12-08T20:35:55.234287+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:35:08Z\",\n \"avg_ns\": 2495123882,\n \"stddev_ns\": 326665224,\n \"avg_ts\": 207.403722,\n \"stddev_ts\": 25.245988,\n \"samples_ns\": [ 2303948322, 2872312613, 2309110712 ],\n \"samples_ts\": [ 222.227, 178.254, 221.73 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:35:17Z\",\n \"avg_ns\": 12409318514,\n \"stddev_ns\": 286745338,\n \"avg_ts\": 41.274203,\n \"stddev_ts\": 0.966327,\n \"samples_ns\": [ 12079101620, 12553439679, 12595414245 ],\n \"samples_ts\": [ 42.3873, 40.7856, 40.6497 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:35:08Z", "avg_ns": 2495123882, "stddev_ns": 326665224, "avg_ts": 207.403722, "stddev_ts": 25.245988, "samples_ns": [ 2303948322, 2872312613, 2309110712 ], "samples_ts": [ 222.227, 178.254, 221.73 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T20:35:17Z", "avg_ns": 12409318514, "stddev_ns": 286745338, "avg_ts": 41.274203, "stddev_ts": 0.966327, "samples_ns": [ 12079101620, 12553439679, 12595414245 ], "samples_ts": [ 42.3873, 40.7856, 40.6497 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 512, "ubatch_size": 128, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 39 }, { "timestamp_utc": "2025-12-08T20:36:06.702084+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:35:55Z\",\n \"avg_ns\": 579298315,\n \"stddev_ns\": 1073220,\n \"avg_ts\": 220.957474,\n \"stddev_ts\": 0.408918,\n \"samples_ns\": [ 578734784, 578624242, 580535919 ],\n \"samples_ts\": [ 221.172, 221.214, 220.486 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:35:58Z\",\n \"avg_ns\": 2826761637,\n \"stddev_ns\": 10793691,\n \"avg_ts\": 45.281939,\n \"stddev_ts\": 0.173282,\n \"samples_ns\": [ 2814298665, 2833055207, 2832931041 ],\n \"samples_ts\": [ 45.482, 45.1809, 45.1829 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:35:55Z", "avg_ns": 579298315, "stddev_ns": 1073220, "avg_ts": 220.957474, "stddev_ts": 0.408918, "samples_ns": [ 578734784, 578624242, 580535919 ], "samples_ts": [ 221.172, 221.214, 220.486 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T20:35:58Z", "avg_ns": 2826761637, "stddev_ns": 10793691, "avg_ts": 45.281939, "stddev_ts": 0.173282, "samples_ns": [ 2814298665, 2833055207, 2832931041 ], "samples_ts": [ 45.482, 45.1809, 45.1829 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 512, "ubatch_size": 256, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 40 }, { "timestamp_utc": "2025-12-08T20:36:44.329039+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:36:07Z\",\n \"avg_ns\": 575681902,\n \"stddev_ns\": 1393883,\n \"avg_ts\": 222.345881,\n \"stddev_ts\": 0.538306,\n \"samples_ns\": [ 577036850, 574252865, 575755993 ],\n \"samples_ts\": [ 221.823, 222.898, 222.316 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:36:09Z\",\n \"avg_ns\": 11546315546,\n \"stddev_ns\": 33553157,\n \"avg_ts\": 44.343399,\n \"stddev_ts\": 0.128752,\n \"samples_ns\": [ 11516647930, 11582728910, 11539569799 ],\n \"samples_ts\": [ 44.4574, 44.2037, 44.3691 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:36:07Z", "avg_ns": 575681902, "stddev_ns": 1393883, "avg_ts": 222.345881, "stddev_ts": 0.538306, "samples_ns": [ 577036850, 574252865, 575755993 ], "samples_ts": [ 221.823, 222.898, 222.316 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T20:36:09Z", "avg_ns": 11546315546, "stddev_ns": 33553157, "avg_ts": 44.343399, "stddev_ts": 0.128752, "samples_ns": [ 11516647930, 11582728910, 11539569799 ], "samples_ts": [ 44.4574, 44.2037, 44.3691 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 512, "ubatch_size": 256, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 41 }, { "timestamp_utc": "2025-12-08T20:37:02.682041+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:36:44Z\",\n \"avg_ns\": 2314727097,\n \"stddev_ns\": 8431718,\n \"avg_ts\": 221.194337,\n \"stddev_ts\": 0.804982,\n \"samples_ns\": [ 2307140607, 2323805014, 2313235670 ],\n \"samples_ts\": [ 221.92, 220.328, 221.335 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:36:54Z\",\n \"avg_ns\": 2812376549,\n \"stddev_ns\": 6387873,\n \"avg_ts\": 45.513265,\n \"stddev_ts\": 0.103325,\n \"samples_ns\": [ 2806476069, 2811493989, 2819159590 ],\n \"samples_ts\": [ 45.6088, 45.5274, 45.4036 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:36:44Z", "avg_ns": 2314727097, "stddev_ns": 8431718, "avg_ts": 221.194337, "stddev_ts": 0.804982, "samples_ns": [ 2307140607, 2323805014, 2313235670 ], "samples_ts": [ 221.92, 220.328, 221.335 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T20:36:54Z", "avg_ns": 2812376549, "stddev_ns": 6387873, "avg_ts": 45.513265, "stddev_ts": 0.103325, "samples_ns": [ 2806476069, 2811493989, 2819159590 ], "samples_ts": [ 45.6088, 45.5274, 45.4036 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 512, "ubatch_size": 256, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 42 }, { "timestamp_utc": "2025-12-08T20:37:48.745690+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:37:03Z\",\n \"avg_ns\": 2318546910,\n \"stddev_ns\": 6334673,\n \"avg_ts\": 220.829065,\n \"stddev_ts\": 0.602760,\n \"samples_ns\": [ 2325528650, 2313166709, 2316945371 ],\n \"samples_ts\": [ 220.165, 221.342, 220.981 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:37:12Z\",\n \"avg_ns\": 12030804541,\n \"stddev_ns\": 94134618,\n \"avg_ts\": 42.559164,\n \"stddev_ts\": 0.334419,\n \"samples_ns\": [ 11922875468, 12073598199, 12095939958 ],\n \"samples_ts\": [ 42.9427, 42.4066, 42.3283 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:37:03Z", "avg_ns": 2318546910, "stddev_ns": 6334673, "avg_ts": 220.829065, "stddev_ts": 0.60276, "samples_ns": [ 2325528650, 2313166709, 2316945371 ], "samples_ts": [ 220.165, 221.342, 220.981 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T20:37:12Z", "avg_ns": 12030804541, "stddev_ns": 94134618, "avg_ts": 42.559164, "stddev_ts": 0.334419, "samples_ns": [ 11922875468, 12073598199, 12095939958 ], "samples_ts": [ 42.9427, 42.4066, 42.3283 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 512, "ubatch_size": 256, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 43 }, { "timestamp_utc": "2025-12-08T20:38:00.133570+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:37:49Z\",\n \"avg_ns\": 577026138,\n \"stddev_ns\": 1523204,\n \"avg_ts\": 221.828071,\n \"stddev_ts\": 0.586314,\n \"samples_ns\": [ 577953879, 577855896, 575268641 ],\n \"samples_ts\": [ 221.471, 221.509, 222.505 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:37:51Z\",\n \"avg_ns\": 2808192172,\n \"stddev_ns\": 5508978,\n \"avg_ts\": 45.581043,\n \"stddev_ts\": 0.089354,\n \"samples_ns\": [ 2806837229, 2803487346, 2814251942 ],\n \"samples_ts\": [ 45.6029, 45.6574, 45.4828 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:37:49Z", "avg_ns": 577026138, "stddev_ns": 1523204, "avg_ts": 221.828071, "stddev_ts": 0.586314, "samples_ns": [ 577953879, 577855896, 575268641 ], "samples_ts": [ 221.471, 221.509, 222.505 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T20:37:51Z", "avg_ns": 2808192172, "stddev_ns": 5508978, "avg_ts": 45.581043, "stddev_ts": 0.089354, "samples_ns": [ 2806837229, 2803487346, 2814251942 ], "samples_ts": [ 45.6029, 45.6574, 45.4828 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 512, "ubatch_size": 512, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 44 }, { "timestamp_utc": "2025-12-08T20:38:38.847070+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:38:01Z\",\n \"avg_ns\": 580898072,\n \"stddev_ns\": 1070207,\n \"avg_ts\": 220.348966,\n \"stddev_ts\": 0.405617,\n \"samples_ns\": [ 582103427, 580531367, 580059422 ],\n \"samples_ts\": [ 219.892, 220.488, 220.667 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:38:03Z\",\n \"avg_ns\": 11698621994,\n \"stddev_ns\": 354392332,\n \"avg_ts\": 43.792174,\n \"stddev_ts\": 1.304017,\n \"samples_ns\": [ 11509644182, 11478771171, 12107450629 ],\n \"samples_ts\": [ 44.4844, 44.6041, 42.288 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:38:01Z", "avg_ns": 580898072, "stddev_ns": 1070207, "avg_ts": 220.348966, "stddev_ts": 0.405617, "samples_ns": [ 582103427, 580531367, 580059422 ], "samples_ts": [ 219.892, 220.488, 220.667 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T20:38:03Z", "avg_ns": 11698621994, "stddev_ns": 354392332, "avg_ts": 43.792174, "stddev_ts": 1.304017, "samples_ns": [ 11509644182, 11478771171, 12107450629 ], "samples_ts": [ 44.4844, 44.6041, 42.288 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 512, "ubatch_size": 512, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 45 }, { "timestamp_utc": "2025-12-08T20:38:57.637985+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:38:39Z\",\n \"avg_ns\": 2434655743,\n \"stddev_ns\": 3756136,\n \"avg_ts\": 210.297005,\n \"stddev_ts\": 0.324201,\n \"samples_ns\": [ 2431801564, 2433254616, 2438911049 ],\n \"samples_ts\": [ 210.543, 210.418, 209.93 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:38:49Z\",\n \"avg_ns\": 2795762348,\n \"stddev_ns\": 3319000,\n \"avg_ts\": 45.783620,\n \"stddev_ts\": 0.054376,\n \"samples_ns\": [ 2797004575, 2792001868, 2798280602 ],\n \"samples_ts\": [ 45.7632, 45.8452, 45.7424 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:38:39Z", "avg_ns": 2434655743, "stddev_ns": 3756136, "avg_ts": 210.297005, "stddev_ts": 0.324201, "samples_ns": [ 2431801564, 2433254616, 2438911049 ], "samples_ts": [ 210.543, 210.418, 209.93 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T20:38:49Z", "avg_ns": 2795762348, "stddev_ns": 3319000, "avg_ts": 45.78362, "stddev_ts": 0.054376, "samples_ns": [ 2797004575, 2792001868, 2798280602 ], "samples_ts": [ 45.7632, 45.8452, 45.7424 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 512, "ubatch_size": 512, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 46 }, { "timestamp_utc": "2025-12-08T20:39:43.051267+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:38:58Z\",\n \"avg_ns\": 2450131270,\n \"stddev_ns\": 5418226,\n \"avg_ts\": 208.969076,\n \"stddev_ts\": 0.461508,\n \"samples_ns\": [ 2447501652, 2456361984, 2446530176 ],\n \"samples_ts\": [ 209.193, 208.438, 209.276 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:39:08Z\",\n \"avg_ns\": 11644079200,\n \"stddev_ns\": 343732728,\n \"avg_ts\": 43.995986,\n \"stddev_ts\": 1.277523,\n \"samples_ns\": [ 11470962645, 12039951529, 11421323426 ],\n \"samples_ts\": [ 44.6344, 42.5251, 44.8284 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:38:58Z", "avg_ns": 2450131270, "stddev_ns": 5418226, "avg_ts": 208.969076, "stddev_ts": 0.461508, "samples_ns": [ 2447501652, 2456361984, 2446530176 ], "samples_ts": [ 209.193, 208.438, 209.276 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T20:39:08Z", "avg_ns": 11644079200, "stddev_ns": 343732728, "avg_ts": 43.995986, "stddev_ts": 1.277523, "samples_ns": [ 11470962645, 12039951529, 11421323426 ], "samples_ts": [ 44.6344, 42.5251, 44.8284 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 512, "ubatch_size": 512, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 47 }, { "timestamp_utc": "2025-12-08T20:39:54.452093+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:39:43Z\",\n \"avg_ns\": 574078281,\n \"stddev_ns\": 4711970,\n \"avg_ts\": 222.976088,\n \"stddev_ts\": 1.823903,\n \"samples_ns\": [ 579343625, 570258751, 572632469 ],\n \"samples_ts\": [ 220.94, 224.46, 223.529 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:39:45Z\",\n \"avg_ns\": 2812472049,\n \"stddev_ns\": 24740222,\n \"avg_ts\": 45.513900,\n \"stddev_ts\": 0.398370,\n \"samples_ns\": [ 2799520355, 2796896457, 2840999336 ],\n \"samples_ts\": [ 45.7221, 45.765, 45.0546 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:39:43Z", "avg_ns": 574078281, "stddev_ns": 4711970, "avg_ts": 222.976088, "stddev_ts": 1.823903, "samples_ns": [ 579343625, 570258751, 572632469 ], "samples_ts": [ 220.94, 224.46, 223.529 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T20:39:45Z", "avg_ns": 2812472049, "stddev_ns": 24740222, "avg_ts": 45.5139, "stddev_ts": 0.39837, "samples_ns": [ 2799520355, 2796896457, 2840999336 ], "samples_ts": [ 45.7221, 45.765, 45.0546 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 48 }, { "timestamp_utc": "2025-12-08T20:40:32.473715+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:39:55Z\",\n \"avg_ns\": 574382451,\n \"stddev_ns\": 1084571,\n \"avg_ts\": 222.848563,\n \"stddev_ts\": 0.420232,\n \"samples_ns\": [ 575633970, 573725117, 573788267 ],\n \"samples_ts\": [ 222.364, 223.103, 223.079 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:39:57Z\",\n \"avg_ns\": 11500321390,\n \"stddev_ns\": 21943769,\n \"avg_ts\": 44.520603,\n \"stddev_ts\": 0.085039,\n \"samples_ns\": [ 11514817319, 11475075688, 11511071164 ],\n \"samples_ts\": [ 44.4644, 44.6184, 44.4789 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:39:55Z", "avg_ns": 574382451, "stddev_ns": 1084571, "avg_ts": 222.848563, "stddev_ts": 0.420232, "samples_ns": [ 575633970, 573725117, 573788267 ], "samples_ts": [ 222.364, 223.103, 223.079 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T20:39:57Z", "avg_ns": 11500321390, "stddev_ns": 21943769, "avg_ts": 44.520603, "stddev_ts": 0.085039, "samples_ns": [ 11514817319, 11475075688, 11511071164 ], "samples_ts": [ 44.4644, 44.6184, 44.4789 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 49 }, { "timestamp_utc": "2025-12-08T20:40:51.446185+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:40:33Z\",\n \"avg_ns\": 2309078230,\n \"stddev_ns\": 2894093,\n \"avg_ts\": 221.733733,\n \"stddev_ts\": 0.277688,\n \"samples_ns\": [ 2307029417, 2307816759, 2312388515 ],\n \"samples_ts\": [ 221.93, 221.855, 221.416 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:40:42Z\",\n \"avg_ns\": 3025721130,\n \"stddev_ns\": 359084082,\n \"avg_ts\": 42.679005,\n \"stddev_ts\": 4.740253,\n \"samples_ns\": [ 2817134518, 2819675755, 3440353117 ],\n \"samples_ts\": [ 45.4362, 45.3953, 37.2055 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:40:33Z", "avg_ns": 2309078230, "stddev_ns": 2894093, "avg_ts": 221.733733, "stddev_ts": 0.277688, "samples_ns": [ 2307029417, 2307816759, 2312388515 ], "samples_ts": [ 221.93, 221.855, 221.416 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T20:40:42Z", "avg_ns": 3025721130, "stddev_ns": 359084082, "avg_ts": 42.679005, "stddev_ts": 4.740253, "samples_ns": [ 2817134518, 2819675755, 3440353117 ], "samples_ts": [ 45.4362, 45.3953, 37.2055 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 50 }, { "timestamp_utc": "2025-12-08T20:41:36.536982+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:40:52Z\",\n \"avg_ns\": 2306956797,\n \"stddev_ns\": 18431768,\n \"avg_ts\": 221.946806,\n \"stddev_ts\": 1.765361,\n \"samples_ns\": [ 2297812534, 2294885173, 2328172685 ],\n \"samples_ts\": [ 222.821, 223.105, 219.915 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:41:01Z\",\n \"avg_ns\": 11722638475,\n \"stddev_ns\": 389637732,\n \"avg_ts\": 43.707763,\n \"stddev_ts\": 1.425581,\n \"samples_ns\": [ 12172247366, 11483466576, 11512201485 ],\n \"samples_ts\": [ 42.0629, 44.5858, 44.4746 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:40:52Z", "avg_ns": 2306956797, "stddev_ns": 18431768, "avg_ts": 221.946806, "stddev_ts": 1.765361, "samples_ns": [ 2297812534, 2294885173, 2328172685 ], "samples_ts": [ 222.821, 223.105, 219.915 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T20:41:01Z", "avg_ns": 11722638475, "stddev_ns": 389637732, "avg_ts": 43.707763, "stddev_ts": 1.425581, "samples_ns": [ 12172247366, 11483466576, 11512201485 ], "samples_ts": [ 42.0629, 44.5858, 44.4746 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 51 }, { "timestamp_utc": "2025-12-08T20:41:47.875923+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:41:37Z\",\n \"avg_ns\": 574911077,\n \"stddev_ns\": 774221,\n \"avg_ts\": 222.643396,\n \"stddev_ts\": 0.299598,\n \"samples_ns\": [ 574432583, 575804313, 574496335 ],\n \"samples_ts\": [ 222.829, 222.298, 222.804 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:41:39Z\",\n \"avg_ns\": 2797233224,\n \"stddev_ns\": 12142559,\n \"avg_ts\": 45.760076,\n \"stddev_ts\": 0.198201,\n \"samples_ns\": [ 2792260845, 2788366073, 2811072754 ],\n \"samples_ts\": [ 45.841, 45.905, 45.5342 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:41:37Z", "avg_ns": 574911077, "stddev_ns": 774221, "avg_ts": 222.643396, "stddev_ts": 0.299598, "samples_ns": [ 574432583, 575804313, 574496335 ], "samples_ts": [ 222.829, 222.298, 222.804 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T20:41:39Z", "avg_ns": 2797233224, "stddev_ns": 12142559, "avg_ts": 45.760076, "stddev_ts": 0.198201, "samples_ns": [ 2792260845, 2788366073, 2811072754 ], "samples_ts": [ 45.841, 45.905, 45.5342 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 52 }, { "timestamp_utc": "2025-12-08T20:42:26.047329+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:41:48Z\",\n \"avg_ns\": 581809055,\n \"stddev_ns\": 1008654,\n \"avg_ts\": 220.003892,\n \"stddev_ts\": 0.381573,\n \"samples_ns\": [ 580683707, 582630565, 582112894 ],\n \"samples_ts\": [ 220.43, 219.693, 219.889 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:41:50Z\",\n \"avg_ns\": 11728586943,\n \"stddev_ns\": 356092953,\n \"avg_ts\": 43.680418,\n \"stddev_ts\": 1.304083,\n \"samples_ns\": [ 11553502440, 12138326733, 11493931656 ],\n \"samples_ts\": [ 44.3156, 42.1804, 44.5452 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:41:48Z", "avg_ns": 581809055, "stddev_ns": 1008654, "avg_ts": 220.003892, "stddev_ts": 0.381573, "samples_ns": [ 580683707, 582630565, 582112894 ], "samples_ts": [ 220.43, 219.693, 219.889 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T20:41:50Z", "avg_ns": 11728586943, "stddev_ns": 356092953, "avg_ts": 43.680418, "stddev_ts": 1.304083, "samples_ns": [ 11553502440, 12138326733, 11493931656 ], "samples_ts": [ 44.3156, 42.1804, 44.5452 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 53 }, { "timestamp_utc": "2025-12-08T20:42:45.152129+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:42:26Z\",\n \"avg_ns\": 2319984563,\n \"stddev_ns\": 4780435,\n \"avg_ts\": 220.691748,\n \"stddev_ts\": 0.454955,\n \"samples_ns\": [ 2314819557, 2320881850, 2324252284 ],\n \"samples_ts\": [ 221.184, 220.606, 220.286 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:42:35Z\",\n \"avg_ns\": 3053198896,\n \"stddev_ns\": 17575620,\n \"avg_ts\": 41.924167,\n \"stddev_ts\": 0.240686,\n \"samples_ns\": [ 3039641571, 3046899005, 3073056113 ],\n \"samples_ts\": [ 42.1102, 42.0099, 41.6523 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:42:26Z", "avg_ns": 2319984563, "stddev_ns": 4780435, "avg_ts": 220.691748, "stddev_ts": 0.454955, "samples_ns": [ 2314819557, 2320881850, 2324252284 ], "samples_ts": [ 221.184, 220.606, 220.286 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T20:42:35Z", "avg_ns": 3053198896, "stddev_ns": 17575620, "avg_ts": 41.924167, "stddev_ts": 0.240686, "samples_ns": [ 3039641571, 3046899005, 3073056113 ], "samples_ts": [ 42.1102, 42.0099, 41.6523 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 54 }, { "timestamp_utc": "2025-12-08T20:43:32.586467+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:42:45Z\",\n \"avg_ns\": 2327831447,\n \"stddev_ns\": 2538417,\n \"avg_ts\": 219.947370,\n \"stddev_ts\": 0.239737,\n \"samples_ns\": [ 2330665010, 2327064092, 2325765239 ],\n \"samples_ts\": [ 219.68, 220.02, 220.143 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:42:55Z\",\n \"avg_ns\": 12481148335,\n \"stddev_ns\": 530681227,\n \"avg_ts\": 41.070956,\n \"stddev_ts\": 1.732281,\n \"samples_ns\": [ 11992199896, 12405748152, 13045496957 ],\n \"samples_ts\": [ 42.6944, 41.2712, 39.2473 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:42:45Z", "avg_ns": 2327831447, "stddev_ns": 2538417, "avg_ts": 219.94737, "stddev_ts": 0.239737, "samples_ns": [ 2330665010, 2327064092, 2325765239 ], "samples_ts": [ 219.68, 220.02, 220.143 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T20:42:55Z", "avg_ns": 12481148335, "stddev_ns": 530681227, "avg_ts": 41.070956, "stddev_ts": 1.732281, "samples_ns": [ 11992199896, 12405748152, 13045496957 ], "samples_ts": [ 42.6944, 41.2712, 39.2473 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 55 }, { "timestamp_utc": "2025-12-08T20:43:44.594549+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:43:33Z\",\n \"avg_ns\": 574684854,\n \"stddev_ns\": 2173142,\n \"avg_ts\": 222.732895,\n \"stddev_ts\": 0.843188,\n \"samples_ns\": [ 576586156, 575152217, 572316190 ],\n \"samples_ts\": [ 221.996, 222.55, 223.653 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:43:35Z\",\n \"avg_ns\": 3010816385,\n \"stddev_ns\": 346673238,\n \"avg_ts\": 42.868726,\n \"stddev_ts\": 4.629177,\n \"samples_ns\": [ 3411014200, 2818693102, 2802741855 ],\n \"samples_ts\": [ 37.5255, 45.4111, 45.6696 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:43:33Z", "avg_ns": 574684854, "stddev_ns": 2173142, "avg_ts": 222.732895, "stddev_ts": 0.843188, "samples_ns": [ 576586156, 575152217, 572316190 ], "samples_ts": [ 221.996, 222.55, 223.653 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T20:43:35Z", "avg_ns": 3010816385, "stddev_ns": 346673238, "avg_ts": 42.868726, "stddev_ts": 4.629177, "samples_ns": [ 3411014200, 2818693102, 2802741855 ], "samples_ts": [ 37.5255, 45.4111, 45.6696 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 56 }, { "timestamp_utc": "2025-12-08T20:44:22.918089+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:43:45Z\",\n \"avg_ns\": 577242890,\n \"stddev_ns\": 1110237,\n \"avg_ts\": 221.744292,\n \"stddev_ts\": 0.426897,\n \"samples_ns\": [ 577673151, 575981918, 578073601 ],\n \"samples_ts\": [ 221.579, 222.229, 221.425 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:43:47Z\",\n \"avg_ns\": 11778182808,\n \"stddev_ns\": 512397093,\n \"avg_ts\": 43.523777,\n \"stddev_ts\": 1.847199,\n \"samples_ns\": [ 11469638282, 12369663220, 11495246924 ],\n \"samples_ts\": [ 44.6396, 41.3916, 44.5401 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:43:45Z", "avg_ns": 577242890, "stddev_ns": 1110237, "avg_ts": 221.744292, "stddev_ts": 0.426897, "samples_ns": [ 577673151, 575981918, 578073601 ], "samples_ts": [ 221.579, 222.229, 221.425 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T20:43:47Z", "avg_ns": 11778182808, "stddev_ns": 512397093, "avg_ts": 43.523777, "stddev_ts": 1.847199, "samples_ns": [ 11469638282, 12369663220, 11495246924 ], "samples_ts": [ 44.6396, 41.3916, 44.5401 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 57 }, { "timestamp_utc": "2025-12-08T20:44:41.804635+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:44:23Z\",\n \"avg_ns\": 2445445614,\n \"stddev_ns\": 5331366,\n \"avg_ts\": 209.369458,\n \"stddev_ts\": 0.456733,\n \"samples_ns\": [ 2446488672, 2439669802, 2450178368 ],\n \"samples_ts\": [ 209.28, 209.864, 208.964 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:44:33Z\",\n \"avg_ns\": 2800146562,\n \"stddev_ns\": 6704877,\n \"avg_ts\": 45.712068,\n \"stddev_ts\": 0.109509,\n \"samples_ns\": [ 2801172314, 2792988338, 2806279036 ],\n \"samples_ts\": [ 45.6952, 45.829, 45.612 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:44:23Z", "avg_ns": 2445445614, "stddev_ns": 5331366, "avg_ts": 209.369458, "stddev_ts": 0.456733, "samples_ns": [ 2446488672, 2439669802, 2450178368 ], "samples_ts": [ 209.28, 209.864, 208.964 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T20:44:33Z", "avg_ns": 2800146562, "stddev_ns": 6704877, "avg_ts": 45.712068, "stddev_ts": 0.109509, "samples_ns": [ 2801172314, 2792988338, 2806279036 ], "samples_ts": [ 45.6952, 45.829, 45.612 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 58 }, { "timestamp_utc": "2025-12-08T20:45:30.042080+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:44:42Z\",\n \"avg_ns\": 2444604195,\n \"stddev_ns\": 9017875,\n \"avg_ts\": 209.442760,\n \"stddev_ts\": 0.773439,\n \"samples_ns\": [ 2446413768, 2434818746, 2452580071 ],\n \"samples_ts\": [ 209.286, 210.283, 208.76 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:44:52Z\",\n \"avg_ns\": 12595189375,\n \"stddev_ns\": 295210400,\n \"avg_ts\": 40.665175,\n \"stddev_ts\": 0.943007,\n \"samples_ns\": [ 12491449583, 12365851680, 12928266863 ],\n \"samples_ts\": [ 40.988, 41.4043, 39.6031 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:44:42Z", "avg_ns": 2444604195, "stddev_ns": 9017875, "avg_ts": 209.44276, "stddev_ts": 0.773439, "samples_ns": [ 2446413768, 2434818746, 2452580071 ], "samples_ts": [ 209.286, 210.283, 208.76 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T20:44:52Z", "avg_ns": 12595189375, "stddev_ns": 295210400, "avg_ts": 40.665175, "stddev_ts": 0.943007, "samples_ns": [ 12491449583, 12365851680, 12928266863 ], "samples_ts": [ 40.988, 41.4043, 39.6031 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 59 }, { "timestamp_utc": "2025-12-08T20:45:42.050605+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:45:30Z\",\n \"avg_ns\": 577283298,\n \"stddev_ns\": 3080407,\n \"avg_ts\": 221.732426,\n \"stddev_ts\": 1.181631,\n \"samples_ns\": [ 580581479, 576787143, 574481274 ],\n \"samples_ts\": [ 220.469, 221.919, 222.81 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:45:32Z\",\n \"avg_ns\": 3010717208,\n \"stddev_ns\": 372379337,\n \"avg_ts\": 42.923368,\n \"stddev_ts\": 4.955118,\n \"samples_ns\": [ 2795687544, 2795760253, 3440703827 ],\n \"samples_ts\": [ 45.7848, 45.7836, 37.2017 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:45:30Z", "avg_ns": 577283298, "stddev_ns": 3080407, "avg_ts": 221.732426, "stddev_ts": 1.181631, "samples_ns": [ 580581479, 576787143, 574481274 ], "samples_ts": [ 220.469, 221.919, 222.81 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T20:45:32Z", "avg_ns": 3010717208, "stddev_ns": 372379337, "avg_ts": 42.923368, "stddev_ts": 4.955118, "samples_ns": [ 2795687544, 2795760253, 3440703827 ], "samples_ts": [ 45.7848, 45.7836, 37.2017 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 60 }, { "timestamp_utc": "2025-12-08T20:46:19.746452+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:45:42Z\",\n \"avg_ns\": 576218892,\n \"stddev_ns\": 244991,\n \"avg_ts\": 222.137832,\n \"stddev_ts\": 0.094465,\n \"samples_ns\": [ 576412717, 575943528, 576300431 ],\n \"samples_ts\": [ 222.063, 222.244, 222.106 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:45:44Z\",\n \"avg_ns\": 11573463981,\n \"stddev_ns\": 50717554,\n \"avg_ts\": 44.239700,\n \"stddev_ts\": 0.194334,\n \"samples_ns\": [ 11597073058, 11608073115, 11515245770 ],\n \"samples_ts\": [ 44.1491, 44.1072, 44.4628 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:45:42Z", "avg_ns": 576218892, "stddev_ns": 244991, "avg_ts": 222.137832, "stddev_ts": 0.094465, "samples_ns": [ 576412717, 575943528, 576300431 ], "samples_ts": [ 222.063, 222.244, 222.106 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T20:45:44Z", "avg_ns": 11573463981, "stddev_ns": 50717554, "avg_ts": 44.2397, "stddev_ts": 0.194334, "samples_ns": [ 11597073058, 11608073115, 11515245770 ], "samples_ts": [ 44.1491, 44.1072, 44.4628 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 61 }, { "timestamp_utc": "2025-12-08T20:46:38.131095+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:46:20Z\",\n \"avg_ns\": 2315897818,\n \"stddev_ns\": 11956558,\n \"avg_ts\": 221.084505,\n \"stddev_ts\": 1.144390,\n \"samples_ns\": [ 2320652794, 2324745389, 2302295271 ],\n \"samples_ts\": [ 220.628, 220.239, 222.387 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:46:29Z\",\n \"avg_ns\": 2815069039,\n \"stddev_ns\": 961272,\n \"avg_ts\": 45.469581,\n \"stddev_ts\": 0.015506,\n \"samples_ns\": [ 2813961085, 2815599404, 2815646629 ],\n \"samples_ts\": [ 45.4875, 45.461, 45.4603 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:46:20Z", "avg_ns": 2315897818, "stddev_ns": 11956558, "avg_ts": 221.084505, "stddev_ts": 1.14439, "samples_ns": [ 2320652794, 2324745389, 2302295271 ], "samples_ts": [ 220.628, 220.239, 222.387 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T20:46:29Z", "avg_ns": 2815069039, "stddev_ns": 961272, "avg_ts": 45.469581, "stddev_ts": 0.015506, "samples_ns": [ 2813961085, 2815599404, 2815646629 ], "samples_ts": [ 45.4875, 45.461, 45.4603 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 62 }, { "timestamp_utc": "2025-12-08T20:47:23.079311+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:46:38Z\",\n \"avg_ns\": 2493109732,\n \"stddev_ns\": 329162382,\n \"avg_ts\": 207.607782,\n \"stddev_ts\": 25.469140,\n \"samples_ns\": [ 2301190453, 2304951243, 2873187502 ],\n \"samples_ts\": [ 222.494, 222.131, 178.199 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:46:48Z\",\n \"avg_ns\": 11493996197,\n \"stddev_ns\": 57881233,\n \"avg_ts\": 44.545747,\n \"stddev_ts\": 0.224012,\n \"samples_ns\": [ 11442464073, 11556620851, 11482903668 ],\n \"samples_ts\": [ 44.7456, 44.3036, 44.588 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:46:38Z", "avg_ns": 2493109732, "stddev_ns": 329162382, "avg_ts": 207.607782, "stddev_ts": 25.46914, "samples_ns": [ 2301190453, 2304951243, 2873187502 ], "samples_ts": [ 222.494, 222.131, 178.199 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T20:46:48Z", "avg_ns": 11493996197, "stddev_ns": 57881233, "avg_ts": 44.545747, "stddev_ts": 0.224012, "samples_ns": [ 11442464073, 11556620851, 11482903668 ], "samples_ts": [ 44.7456, 44.3036, 44.588 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 63 }, { "timestamp_utc": "2025-12-08T20:47:34.446734+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:47:23Z\",\n \"avg_ns\": 577192485,\n \"stddev_ns\": 1301344,\n \"avg_ts\": 221.763861,\n \"stddev_ts\": 0.500038,\n \"samples_ns\": [ 575873939, 577227598, 578475918 ],\n \"samples_ts\": [ 222.271, 221.75, 221.271 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:47:25Z\",\n \"avg_ns\": 2797604861,\n \"stddev_ns\": 9714213,\n \"avg_ts\": 45.753792,\n \"stddev_ts\": 0.159083,\n \"samples_ns\": [ 2786835347, 2800273166, 2805706070 ],\n \"samples_ts\": [ 45.9302, 45.7098, 45.6213 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:47:23Z", "avg_ns": 577192485, "stddev_ns": 1301344, "avg_ts": 221.763861, "stddev_ts": 0.500038, "samples_ns": [ 575873939, 577227598, 578475918 ], "samples_ts": [ 222.271, 221.75, 221.271 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T20:47:25Z", "avg_ns": 2797604861, "stddev_ns": 9714213, "avg_ts": 45.753792, "stddev_ts": 0.159083, "samples_ns": [ 2786835347, 2800273166, 2805706070 ], "samples_ts": [ 45.9302, 45.7098, 45.6213 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 64 }, { "timestamp_utc": "2025-12-08T20:48:15.094791+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:47:35Z\",\n \"avg_ns\": 576542221,\n \"stddev_ns\": 1612437,\n \"avg_ts\": 222.014385,\n \"stddev_ts\": 0.620103,\n \"samples_ns\": [ 575962407, 578364387, 575299869 ],\n \"samples_ts\": [ 222.237, 221.314, 222.493 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:47:37Z\",\n \"avg_ns\": 12559838641,\n \"stddev_ns\": 595604802,\n \"avg_ts\": 40.825778,\n \"stddev_ts\": 1.927784,\n \"samples_ns\": [ 11988088843, 12514697386, 13176729695 ],\n \"samples_ts\": [ 42.7091, 40.9119, 38.8564 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:47:35Z", "avg_ns": 576542221, "stddev_ns": 1612437, "avg_ts": 222.014385, "stddev_ts": 0.620103, "samples_ns": [ 575962407, 578364387, 575299869 ], "samples_ts": [ 222.237, 221.314, 222.493 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T20:47:37Z", "avg_ns": 12559838641, "stddev_ns": 595604802, "avg_ts": 40.825778, "stddev_ts": 1.927784, "samples_ns": [ 11988088843, 12514697386, 13176729695 ], "samples_ts": [ 42.7091, 40.9119, 38.8564 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 65 }, { "timestamp_utc": "2025-12-08T20:48:34.328136+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:48:15Z\",\n \"avg_ns\": 2352792412,\n \"stddev_ns\": 7211421,\n \"avg_ts\": 217.615124,\n \"stddev_ts\": 0.668051,\n \"samples_ns\": [ 2344547061, 2357920337, 2355909840 ],\n \"samples_ts\": [ 218.379, 217.14, 217.326 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:48:25Z\",\n \"avg_ns\": 3045824895,\n \"stddev_ns\": 17121234,\n \"avg_ts\": 42.025624,\n \"stddev_ts\": 0.235894,\n \"samples_ns\": [ 3042778653, 3030431340, 3064264693 ],\n \"samples_ts\": [ 42.0668, 42.2382, 41.7718 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:48:15Z", "avg_ns": 2352792412, "stddev_ns": 7211421, "avg_ts": 217.615124, "stddev_ts": 0.668051, "samples_ns": [ 2344547061, 2357920337, 2355909840 ], "samples_ts": [ 218.379, 217.14, 217.326 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T20:48:25Z", "avg_ns": 3045824895, "stddev_ns": 17121234, "avg_ts": 42.025624, "stddev_ts": 0.235894, "samples_ns": [ 3042778653, 3030431340, 3064264693 ], "samples_ts": [ 42.0668, 42.2382, 41.7718 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 66 }, { "timestamp_utc": "2025-12-08T20:49:20.053725+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:48:34Z\",\n \"avg_ns\": 2519763154,\n \"stddev_ns\": 347982398,\n \"avg_ts\": 205.616121,\n \"stddev_ts\": 26.312423,\n \"samples_ns\": [ 2921335448, 2331081529, 2306872485 ],\n \"samples_ts\": [ 175.262, 219.641, 221.946 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:48:44Z\",\n \"avg_ns\": 11724050159,\n \"stddev_ns\": 377929624,\n \"avg_ts\": 43.700648,\n \"stddev_ts\": 1.383546,\n \"samples_ns\": [ 11532554627, 12159397538, 11480198312 ],\n \"samples_ts\": [ 44.3961, 42.1073, 44.5985 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:48:34Z", "avg_ns": 2519763154, "stddev_ns": 347982398, "avg_ts": 205.616121, "stddev_ts": 26.312423, "samples_ns": [ 2921335448, 2331081529, 2306872485 ], "samples_ts": [ 175.262, 219.641, 221.946 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T20:48:44Z", "avg_ns": 11724050159, "stddev_ns": 377929624, "avg_ts": 43.700648, "stddev_ts": 1.383546, "samples_ns": [ 11532554627, 12159397538, 11480198312 ], "samples_ts": [ 44.3961, 42.1073, 44.5985 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 67 }, { "timestamp_utc": "2025-12-08T20:49:32.072868+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:49:20Z\",\n \"avg_ns\": 772760189,\n \"stddev_ns\": 330591825,\n \"avg_ts\": 183.605277,\n \"stddev_ts\": 62.991350,\n \"samples_ns\": [ 583177232, 580611465, 1154491872 ],\n \"samples_ts\": [ 219.487, 220.457, 110.871 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:49:23Z\",\n \"avg_ns\": 2812751928,\n \"stddev_ns\": 9314500,\n \"avg_ts\": 45.507368,\n \"stddev_ts\": 0.150959,\n \"samples_ns\": [ 2816829715, 2819331796, 2802094275 ],\n \"samples_ts\": [ 45.4412, 45.4008, 45.6801 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:49:20Z", "avg_ns": 772760189, "stddev_ns": 330591825, "avg_ts": 183.605277, "stddev_ts": 62.99135, "samples_ns": [ 583177232, 580611465, 1154491872 ], "samples_ts": [ 219.487, 220.457, 110.871 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T20:49:23Z", "avg_ns": 2812751928, "stddev_ns": 9314500, "avg_ts": 45.507368, "stddev_ts": 0.150959, "samples_ns": [ 2816829715, 2819331796, 2802094275 ], "samples_ts": [ 45.4412, 45.4008, 45.6801 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 68 }, { "timestamp_utc": "2025-12-08T20:50:09.542464+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:49:32Z\",\n \"avg_ns\": 577353556,\n \"stddev_ns\": 939982,\n \"avg_ts\": 221.701632,\n \"stddev_ts\": 0.360817,\n \"samples_ns\": [ 578232256, 577464959, 576363455 ],\n \"samples_ts\": [ 221.364, 221.658, 222.082 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:49:34Z\",\n \"avg_ns\": 11497163227,\n \"stddev_ns\": 19544097,\n \"avg_ts\": 44.532810,\n \"stddev_ts\": 0.075645,\n \"samples_ns\": [ 11491003737, 11481441530, 11519044416 ],\n \"samples_ts\": [ 44.5566, 44.5937, 44.4481 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:49:32Z", "avg_ns": 577353556, "stddev_ns": 939982, "avg_ts": 221.701632, "stddev_ts": 0.360817, "samples_ns": [ 578232256, 577464959, 576363455 ], "samples_ts": [ 221.364, 221.658, 222.082 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T20:49:34Z", "avg_ns": 11497163227, "stddev_ns": 19544097, "avg_ts": 44.53281, "stddev_ts": 0.075645, "samples_ns": [ 11491003737, 11481441530, 11519044416 ], "samples_ts": [ 44.5566, 44.5937, 44.4481 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 69 }, { "timestamp_utc": "2025-12-08T20:50:28.428058+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:50:10Z\",\n \"avg_ns\": 2447581001,\n \"stddev_ns\": 13402510,\n \"avg_ts\": 209.190304,\n \"stddev_ts\": 1.143182,\n \"samples_ns\": [ 2462394748, 2444051895, 2436296362 ],\n \"samples_ts\": [ 207.928, 209.488, 210.155 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:50:19Z\",\n \"avg_ns\": 2804527873,\n \"stddev_ns\": 16877223,\n \"avg_ts\": 45.641581,\n \"stddev_ts\": 0.274259,\n \"samples_ns\": [ 2822650163, 2801673340, 2789260118 ],\n \"samples_ts\": [ 45.3475, 45.687, 45.8903 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:50:10Z", "avg_ns": 2447581001, "stddev_ns": 13402510, "avg_ts": 209.190304, "stddev_ts": 1.143182, "samples_ns": [ 2462394748, 2444051895, 2436296362 ], "samples_ts": [ 207.928, 209.488, 210.155 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T20:50:19Z", "avg_ns": 2804527873, "stddev_ns": 16877223, "avg_ts": 45.641581, "stddev_ts": 0.274259, "samples_ns": [ 2822650163, 2801673340, 2789260118 ], "samples_ts": [ 45.3475, 45.687, 45.8903 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 70 }, { "timestamp_utc": "2025-12-08T20:51:14.048549+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:50:29Z\",\n \"avg_ns\": 2452497937,\n \"stddev_ns\": 7101791,\n \"avg_ts\": 208.767909,\n \"stddev_ts\": 0.605482,\n \"samples_ns\": [ 2455930571, 2457230909, 2444332333 ],\n \"samples_ts\": [ 208.475, 208.365, 209.464 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:50:38Z\",\n \"avg_ns\": 11707597773,\n \"stddev_ns\": 368851230,\n \"avg_ts\": 43.760725,\n \"stddev_ts\": 1.354067,\n \"samples_ns\": [ 12133506024, 11496328955, 11492958341 ],\n \"samples_ts\": [ 42.1972, 44.536, 44.549 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:50:29Z", "avg_ns": 2452497937, "stddev_ns": 7101791, "avg_ts": 208.767909, "stddev_ts": 0.605482, "samples_ns": [ 2455930571, 2457230909, 2444332333 ], "samples_ts": [ 208.475, 208.365, 209.464 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_type": "gemma3 270M Q4_K - Medium", "model_size": 246587904, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T20:50:38Z", "avg_ns": 11707597773, "stddev_ns": 368851230, "avg_ts": 43.760725, "stddev_ts": 1.354067, "samples_ns": [ 12133506024, 11496328955, 11492958341 ], "samples_ts": [ 42.1972, 44.536, 44.549 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 71 }, { "timestamp_utc": "2025-12-08T20:51:34.878168+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:51:15Z\",\n \"avg_ns\": 949565670,\n \"stddev_ns\": 5868505,\n \"avg_ts\": 134.801913,\n \"stddev_ts\": 0.835478,\n \"samples_ns\": [ 954118291, 951635933, 942942788 ],\n \"samples_ts\": [ 134.155, 134.505, 135.745 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:51:19Z\",\n \"avg_ns\": 5047061830,\n \"stddev_ns\": 40144183,\n \"avg_ts\": 25.362360,\n \"stddev_ts\": 0.201712,\n \"samples_ns\": [ 5046637398, 5087416547, 5007131545 ],\n \"samples_ts\": [ 25.3634, 25.1601, 25.5635 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:51:15Z", "avg_ns": 949565670, "stddev_ns": 5868505, "avg_ts": 134.801913, "stddev_ts": 0.835478, "samples_ns": [ 954118291, 951635933, 942942788 ], "samples_ts": [ 134.155, 134.505, 135.745 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T20:51:19Z", "avg_ns": 5047061830, "stddev_ns": 40144183, "avg_ts": 25.36236, "stddev_ts": 0.201712, "samples_ns": [ 5046637398, 5087416547, 5007131545 ], "samples_ts": [ 25.3634, 25.1601, 25.5635 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 512, "ubatch_size": 128, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 72 }, { "timestamp_utc": "2025-12-08T20:52:42.698345+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:51:35Z\",\n \"avg_ns\": 937518237,\n \"stddev_ns\": 1706502,\n \"avg_ts\": 136.530979,\n \"stddev_ts\": 0.248665,\n \"samples_ns\": [ 938784147, 935578233, 938192333 ],\n \"samples_ts\": [ 136.347, 136.814, 136.433 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:51:39Z\",\n \"avg_ns\": 21121412060,\n \"stddev_ns\": 129317472,\n \"avg_ts\": 24.241407,\n \"stddev_ts\": 0.147959,\n \"samples_ns\": [ 21026688986, 21268741908, 21068805286 ],\n \"samples_ts\": [ 24.35, 24.0729, 24.3013 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:51:35Z", "avg_ns": 937518237, "stddev_ns": 1706502, "avg_ts": 136.530979, "stddev_ts": 0.248665, "samples_ns": [ 938784147, 935578233, 938192333 ], "samples_ts": [ 136.347, 136.814, 136.433 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T20:51:39Z", "avg_ns": 21121412060, "stddev_ns": 129317472, "avg_ts": 24.241407, "stddev_ts": 0.147959, "samples_ns": [ 21026688986, 21268741908, 21068805286 ], "samples_ts": [ 24.35, 24.0729, 24.3013 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 512, "ubatch_size": 128, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 73 }, { "timestamp_utc": "2025-12-08T20:53:16.578670+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:52:43Z\",\n \"avg_ns\": 4036786721,\n \"stddev_ns\": 335581168,\n \"avg_ts\": 127.393847,\n \"stddev_ts\": 10.110099,\n \"samples_ns\": [ 3858162475, 4423898701, 3828298987 ],\n \"samples_ts\": [ 132.706, 115.735, 133.741 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:52:59Z\",\n \"avg_ns\": 5741269968,\n \"stddev_ns\": 367475333,\n \"avg_ts\": 22.353722,\n \"stddev_ts\": 1.382865,\n \"samples_ns\": [ 5571589069, 6162925523, 5489295312 ],\n \"samples_ts\": [ 22.9737, 20.7694, 23.3181 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:52:43Z", "avg_ns": 4036786721, "stddev_ns": 335581168, "avg_ts": 127.393847, "stddev_ts": 10.110099, "samples_ns": [ 3858162475, 4423898701, 3828298987 ], "samples_ts": [ 132.706, 115.735, 133.741 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T20:52:59Z", "avg_ns": 5741269968, "stddev_ns": 367475333, "avg_ts": 22.353722, "stddev_ts": 1.382865, "samples_ns": [ 5571589069, 6162925523, 5489295312 ], "samples_ts": [ 22.9737, 20.7694, 23.3181 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 512, "ubatch_size": 128, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 74 }, { "timestamp_utc": "2025-12-08T20:54:35.512318+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:53:17Z\",\n \"avg_ns\": 4049112748,\n \"stddev_ns\": 311689692,\n \"avg_ts\": 126.928165,\n \"stddev_ts\": 9.367831,\n \"samples_ns\": [ 3844862506, 4407873493, 3894602245 ],\n \"samples_ts\": [ 133.165, 116.156, 131.464 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:53:33Z\",\n \"avg_ns\": 20754675990,\n \"stddev_ns\": 361156303,\n \"avg_ts\": 24.674071,\n \"stddev_ts\": 0.425117,\n \"samples_ns\": [ 20532475533, 20560155455, 21171396984 ],\n \"samples_ts\": [ 24.9361, 24.9025, 24.1836 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:53:17Z", "avg_ns": 4049112748, "stddev_ns": 311689692, "avg_ts": 126.928165, "stddev_ts": 9.367831, "samples_ns": [ 3844862506, 4407873493, 3894602245 ], "samples_ts": [ 133.165, 116.156, 131.464 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T20:53:33Z", "avg_ns": 20754675990, "stddev_ns": 361156303, "avg_ts": 24.674071, "stddev_ts": 0.425117, "samples_ns": [ 20532475533, 20560155455, 21171396984 ], "samples_ts": [ 24.9361, 24.9025, 24.1836 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 512, "ubatch_size": 128, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 75 }, { "timestamp_utc": "2025-12-08T20:54:55.645747+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:54:36Z\",\n \"avg_ns\": 939074584,\n \"stddev_ns\": 3509001,\n \"avg_ts\": 136.305669,\n \"stddev_ts\": 0.508604,\n \"samples_ns\": [ 942956682, 938138094, 936128977 ],\n \"samples_ts\": [ 135.743, 136.44, 136.733 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:54:39Z\",\n \"avg_ns\": 5233488826,\n \"stddev_ns\": 341664885,\n \"avg_ts\": 24.525074,\n \"stddev_ts\": 1.544101,\n \"samples_ns\": [ 5058492235, 5627201162, 5014773081 ],\n \"samples_ts\": [ 25.304, 22.7467, 25.5246 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:54:36Z", "avg_ns": 939074584, "stddev_ns": 3509001, "avg_ts": 136.305669, "stddev_ts": 0.508604, "samples_ns": [ 942956682, 938138094, 936128977 ], "samples_ts": [ 135.743, 136.44, 136.733 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T20:54:39Z", "avg_ns": 5233488826, "stddev_ns": 341664885, "avg_ts": 24.525074, "stddev_ts": 1.544101, "samples_ns": [ 5058492235, 5627201162, 5014773081 ], "samples_ts": [ 25.304, 22.7467, 25.5246 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 512, "ubatch_size": 256, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 76 }, { "timestamp_utc": "2025-12-08T20:56:04.393979+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:54:56Z\",\n \"avg_ns\": 936073843,\n \"stddev_ns\": 5708455,\n \"avg_ts\": 136.744739,\n \"stddev_ts\": 0.833977,\n \"samples_ns\": [ 941763028, 936112190, 930346311 ],\n \"samples_ts\": [ 135.915, 136.736, 137.583 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:54:59Z\",\n \"avg_ns\": 21439203263,\n \"stddev_ns\": 494033660,\n \"avg_ts\": 23.889841,\n \"stddev_ts\": 0.543987,\n \"samples_ns\": [ 22003508402, 21084669626, 21229431762 ],\n \"samples_ts\": [ 23.269, 24.283, 24.1175 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:54:56Z", "avg_ns": 936073843, "stddev_ns": 5708455, "avg_ts": 136.744739, "stddev_ts": 0.833977, "samples_ns": [ 941763028, 936112190, 930346311 ], "samples_ts": [ 135.915, 136.736, 137.583 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T20:54:59Z", "avg_ns": 21439203263, "stddev_ns": 494033660, "avg_ts": 23.889841, "stddev_ts": 0.543987, "samples_ns": [ 22003508402, 21084669626, 21229431762 ], "samples_ts": [ 23.269, 24.283, 24.1175 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 512, "ubatch_size": 256, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 77 }, { "timestamp_utc": "2025-12-08T20:56:37.828201+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:56:04Z\",\n \"avg_ns\": 4074758095,\n \"stddev_ns\": 339892967,\n \"avg_ts\": 126.210186,\n \"stddev_ts\": 10.044241,\n \"samples_ns\": [ 3881809517, 3875250358, 4467214411 ],\n \"samples_ts\": [ 131.897, 132.12, 114.613 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:56:21Z\",\n \"avg_ns\": 5541686228,\n \"stddev_ns\": 42997197,\n \"avg_ts\": 23.098586,\n \"stddev_ts\": 0.178420,\n \"samples_ns\": [ 5591333682, 5516544355, 5517180648 ],\n \"samples_ts\": [ 22.8926, 23.2029, 23.2003 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:56:04Z", "avg_ns": 4074758095, "stddev_ns": 339892967, "avg_ts": 126.210186, "stddev_ts": 10.044241, "samples_ns": [ 3881809517, 3875250358, 4467214411 ], "samples_ts": [ 131.897, 132.12, 114.613 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T20:56:21Z", "avg_ns": 5541686228, "stddev_ns": 42997197, "avg_ts": 23.098586, "stddev_ts": 0.17842, "samples_ns": [ 5591333682, 5516544355, 5517180648 ], "samples_ts": [ 22.8926, 23.2029, 23.2003 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 512, "ubatch_size": 256, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 78 }, { "timestamp_utc": "2025-12-08T20:57:58.055698+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:56:38Z\",\n \"avg_ns\": 3871608686,\n \"stddev_ns\": 5961714,\n \"avg_ts\": 132.244979,\n \"stddev_ts\": 0.203734,\n \"samples_ns\": [ 3865007280, 3876597694, 3873221086 ],\n \"samples_ts\": [ 132.471, 132.075, 132.19 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:56:53Z\",\n \"avg_ns\": 21349013157,\n \"stddev_ns\": 229589628,\n \"avg_ts\": 23.984216,\n \"stddev_ts\": 0.257024,\n \"samples_ns\": [ 21600644895, 21295463855, 21150930723 ],\n \"samples_ts\": [ 23.703, 24.0427, 24.207 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:56:38Z", "avg_ns": 3871608686, "stddev_ns": 5961714, "avg_ts": 132.244979, "stddev_ts": 0.203734, "samples_ns": [ 3865007280, 3876597694, 3873221086 ], "samples_ts": [ 132.471, 132.075, 132.19 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T20:56:53Z", "avg_ns": 21349013157, "stddev_ns": 229589628, "avg_ts": 23.984216, "stddev_ts": 0.257024, "samples_ns": [ 21600644895, 21295463855, 21150930723 ], "samples_ts": [ 23.703, 24.0427, 24.207 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 512, "ubatch_size": 256, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 79 }, { "timestamp_utc": "2025-12-08T20:58:17.709744+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:57:58Z\",\n \"avg_ns\": 940164746,\n \"stddev_ns\": 3938494,\n \"avg_ts\": 136.147941,\n \"stddev_ts\": 0.569051,\n \"samples_ns\": [ 938408358, 937410156, 944675725 ],\n \"samples_ts\": [ 136.401, 136.546, 135.496 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:58:02Z\",\n \"avg_ns\": 5074993825,\n \"stddev_ns\": 24614643,\n \"avg_ts\": 25.222102,\n \"stddev_ts\": 0.122511,\n \"samples_ns\": [ 5096616138, 5080158502, 5048206837 ],\n \"samples_ts\": [ 25.1147, 25.1961, 25.3555 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:57:58Z", "avg_ns": 940164746, "stddev_ns": 3938494, "avg_ts": 136.147941, "stddev_ts": 0.569051, "samples_ns": [ 938408358, 937410156, 944675725 ], "samples_ts": [ 136.401, 136.546, 135.496 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T20:58:02Z", "avg_ns": 5074993825, "stddev_ns": 24614643, "avg_ts": 25.222102, "stddev_ts": 0.122511, "samples_ns": [ 5096616138, 5080158502, 5048206837 ], "samples_ts": [ 25.1147, 25.1961, 25.3555 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 512, "ubatch_size": 512, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 80 }, { "timestamp_utc": "2025-12-08T20:59:26.060461+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:58:18Z\",\n \"avg_ns\": 944505276,\n \"stddev_ns\": 9249198,\n \"avg_ts\": 135.529301,\n \"stddev_ts\": 1.320020,\n \"samples_ns\": [ 940046922, 955139213, 938329693 ],\n \"samples_ts\": [ 136.163, 134.012, 136.413 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:58:22Z\",\n \"avg_ns\": 21302864218,\n \"stddev_ns\": 705189542,\n \"avg_ts\": 24.051845,\n \"stddev_ts\": 0.793965,\n \"samples_ns\": [ 22030644393, 20622676323, 21255271938 ],\n \"samples_ts\": [ 23.2404, 24.827, 24.0881 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:58:18Z", "avg_ns": 944505276, "stddev_ns": 9249198, "avg_ts": 135.529301, "stddev_ts": 1.32002, "samples_ns": [ 940046922, 955139213, 938329693 ], "samples_ts": [ 136.163, 134.012, 136.413 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T20:58:22Z", "avg_ns": 21302864218, "stddev_ns": 705189542, "avg_ts": 24.051845, "stddev_ts": 0.793965, "samples_ns": [ 22030644393, 20622676323, 21255271938 ], "samples_ts": [ 23.2404, 24.827, 24.0881 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 512, "ubatch_size": 512, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 81 }, { "timestamp_utc": "2025-12-08T20:59:58.618944+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:59:26Z\",\n \"avg_ns\": 4064517968,\n \"stddev_ns\": 2450842,\n \"avg_ts\": 125.968228,\n \"stddev_ts\": 0.075980,\n \"samples_ns\": [ 4066302184, 4065528243, 4061723477 ],\n \"samples_ts\": [ 125.913, 125.937, 126.055 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:59:43Z\",\n \"avg_ns\": 5002784315,\n \"stddev_ns\": 11921176,\n \"avg_ts\": 25.585849,\n \"stddev_ts\": 0.061016,\n \"samples_ns\": [ 4989745428, 5005482107, 5013125410 ],\n \"samples_ts\": [ 25.6526, 25.572, 25.533 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:59:26Z", "avg_ns": 4064517968, "stddev_ns": 2450842, "avg_ts": 125.968228, "stddev_ts": 0.07598, "samples_ns": [ 4066302184, 4065528243, 4061723477 ], "samples_ts": [ 125.913, 125.937, 126.055 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T20:59:43Z", "avg_ns": 5002784315, "stddev_ns": 11921176, "avg_ts": 25.585849, "stddev_ts": 0.061016, "samples_ns": [ 4989745428, 5005482107, 5013125410 ], "samples_ts": [ 25.6526, 25.572, 25.533 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 512, "ubatch_size": 512, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 82 }, { "timestamp_utc": "2025-12-08T21:01:18.655894+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:59:59Z\",\n \"avg_ns\": 4105830083,\n \"stddev_ns\": 10545991,\n \"avg_ts\": 124.701276,\n \"stddev_ts\": 0.320294,\n \"samples_ns\": [ 4116355510, 4095264421, 4105870320 ],\n \"samples_ts\": [ 124.382, 125.022, 124.7 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:00:16Z\",\n \"avg_ns\": 20796082220,\n \"stddev_ns\": 346257369,\n \"avg_ts\": 24.624546,\n \"stddev_ts\": 0.407613,\n \"samples_ns\": [ 21177904428, 20502443237, 20707898995 ],\n \"samples_ts\": [ 24.1761, 24.9726, 24.7249 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T20:59:59Z", "avg_ns": 4105830083, "stddev_ns": 10545991, "avg_ts": 124.701276, "stddev_ts": 0.320294, "samples_ns": [ 4116355510, 4095264421, 4105870320 ], "samples_ts": [ 124.382, 125.022, 124.7 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T21:00:16Z", "avg_ns": 20796082220, "stddev_ns": 346257369, "avg_ts": 24.624546, "stddev_ts": 0.407613, "samples_ns": [ 21177904428, 20502443237, 20707898995 ], "samples_ts": [ 24.1761, 24.9726, 24.7249 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 512, "ubatch_size": 512, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 83 }, { "timestamp_utc": "2025-12-08T21:01:38.242093+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:01:19Z\",\n \"avg_ns\": 941368996,\n \"stddev_ns\": 3741737,\n \"avg_ts\": 135.973621,\n \"stddev_ts\": 0.541690,\n \"samples_ns\": [ 943552561, 943505788, 937048640 ],\n \"samples_ts\": [ 135.658, 135.664, 136.599 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:01:23Z\",\n \"avg_ns\": 5048955937,\n \"stddev_ns\": 63278551,\n \"avg_ts\": 25.354435,\n \"stddev_ts\": 0.318203,\n \"samples_ns\": [ 5109933112, 5053331560, 4983603139 ],\n \"samples_ts\": [ 25.0493, 25.3298, 25.6842 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:01:19Z", "avg_ns": 941368996, "stddev_ns": 3741737, "avg_ts": 135.973621, "stddev_ts": 0.54169, "samples_ns": [ 943552561, 943505788, 937048640 ], "samples_ts": [ 135.658, 135.664, 136.599 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T21:01:23Z", "avg_ns": 5048955937, "stddev_ns": 63278551, "avg_ts": 25.354435, "stddev_ts": 0.318203, "samples_ns": [ 5109933112, 5053331560, 4983603139 ], "samples_ts": [ 25.0493, 25.3298, 25.6842 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 84 }, { "timestamp_utc": "2025-12-08T21:02:45.516903+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:01:38Z\",\n \"avg_ns\": 941688542,\n \"stddev_ns\": 4311774,\n \"avg_ts\": 135.927947,\n \"stddev_ts\": 0.622828,\n \"samples_ns\": [ 942152454, 945749493, 937163680 ],\n \"samples_ts\": [ 135.859, 135.342, 136.582 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:01:42Z\",\n \"avg_ns\": 20943797023,\n \"stddev_ns\": 383286896,\n \"avg_ts\": 24.451896,\n \"stddev_ts\": 0.452157,\n \"samples_ns\": [ 20502372439, 21192205836, 21136812794 ],\n \"samples_ts\": [ 24.9727, 24.1598, 24.2231 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:01:38Z", "avg_ns": 941688542, "stddev_ns": 4311774, "avg_ts": 135.927947, "stddev_ts": 0.622828, "samples_ns": [ 942152454, 945749493, 937163680 ], "samples_ts": [ 135.859, 135.342, 136.582 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T21:01:42Z", "avg_ns": 20943797023, "stddev_ns": 383286896, "avg_ts": 24.451896, "stddev_ts": 0.452157, "samples_ns": [ 20502372439, 21192205836, 21136812794 ], "samples_ts": [ 24.9727, 24.1598, 24.2231 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 85 }, { "timestamp_utc": "2025-12-08T21:03:17.946959+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:02:46Z\",\n \"avg_ns\": 4043552787,\n \"stddev_ns\": 319627608,\n \"avg_ts\": 127.128031,\n \"stddev_ts\": 9.615667,\n \"samples_ns\": [ 3875010560, 3843470342, 4412177459 ],\n \"samples_ts\": [ 132.129, 133.213, 116.042 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:03:02Z\",\n \"avg_ns\": 5070984869,\n \"stddev_ns\": 32983189,\n \"avg_ts\": 25.242355,\n \"stddev_ts\": 0.163792,\n \"samples_ns\": [ 5062250023, 5107456148, 5043248438 ],\n \"samples_ts\": [ 25.2852, 25.0614, 25.3805 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:02:46Z", "avg_ns": 4043552787, "stddev_ns": 319627608, "avg_ts": 127.128031, "stddev_ts": 9.615667, "samples_ns": [ 3875010560, 3843470342, 4412177459 ], "samples_ts": [ 132.129, 133.213, 116.042 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T21:03:02Z", "avg_ns": 5070984869, "stddev_ns": 32983189, "avg_ts": 25.242355, "stddev_ts": 0.163792, "samples_ns": [ 5062250023, 5107456148, 5043248438 ], "samples_ts": [ 25.2852, 25.0614, 25.3805 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 86 }, { "timestamp_utc": "2025-12-08T21:04:37.197747+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:03:18Z\",\n \"avg_ns\": 3865011001,\n \"stddev_ns\": 9189626,\n \"avg_ts\": 132.471015,\n \"stddev_ts\": 0.315365,\n \"samples_ns\": [ 3869129173, 3871421028, 3854482803 ],\n \"samples_ts\": [ 132.33, 132.251, 132.832 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:03:34Z\",\n \"avg_ns\": 21027781313,\n \"stddev_ns\": 336272958,\n \"avg_ts\": 24.352931,\n \"stddev_ts\": 0.393038,\n \"samples_ns\": [ 21204950147, 20639968115, 21238425679 ],\n \"samples_ts\": [ 24.1453, 24.8062, 24.1072 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:03:18Z", "avg_ns": 3865011001, "stddev_ns": 9189626, "avg_ts": 132.471015, "stddev_ts": 0.315365, "samples_ns": [ 3869129173, 3871421028, 3854482803 ], "samples_ts": [ 132.33, 132.251, 132.832 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T21:03:34Z", "avg_ns": 21027781313, "stddev_ns": 336272958, "avg_ts": 24.352931, "stddev_ts": 0.393038, "samples_ns": [ 21204950147, 20639968115, 21238425679 ], "samples_ts": [ 24.1453, 24.8062, 24.1072 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 87 }, { "timestamp_utc": "2025-12-08T21:04:57.375930+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:04:37Z\",\n \"avg_ns\": 936090748,\n \"stddev_ns\": 4501283,\n \"avg_ts\": 136.740982,\n \"stddev_ts\": 0.655789,\n \"samples_ns\": [ 941264032, 933939308, 933068904 ],\n \"samples_ts\": [ 135.987, 137.054, 137.182 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:04:41Z\",\n \"avg_ns\": 5246605566,\n \"stddev_ns\": 349709578,\n \"avg_ts\": 24.466518,\n \"stddev_ts\": 1.570614,\n \"samples_ns\": [ 5650247711, 5054861368, 5034707621 ],\n \"samples_ts\": [ 22.6539, 25.3222, 25.4235 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:04:37Z", "avg_ns": 936090748, "stddev_ns": 4501283, "avg_ts": 136.740982, "stddev_ts": 0.655789, "samples_ns": [ 941264032, 933939308, 933068904 ], "samples_ts": [ 135.987, 137.054, 137.182 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T21:04:41Z", "avg_ns": 5246605566, "stddev_ns": 349709578, "avg_ts": 24.466518, "stddev_ts": 1.570614, "samples_ns": [ 5650247711, 5054861368, 5034707621 ], "samples_ts": [ 22.6539, 25.3222, 25.4235 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 88 }, { "timestamp_utc": "2025-12-08T21:06:04.462174+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:04:57Z\",\n \"avg_ns\": 946384167,\n \"stddev_ns\": 2137943,\n \"avg_ts\": 135.252089,\n \"stddev_ts\": 0.305471,\n \"samples_ns\": [ 946232973, 944325834, 948593694 ],\n \"samples_ts\": [ 135.273, 135.546, 134.937 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:05:01Z\",\n \"avg_ns\": 20870020017,\n \"stddev_ns\": 715888907,\n \"avg_ts\": 24.551686,\n \"stddev_ts\": 0.825912,\n \"samples_ns\": [ 20481345472, 21696176853, 20432537728 ],\n \"samples_ts\": [ 24.9984, 23.5986, 25.0581 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:04:57Z", "avg_ns": 946384167, "stddev_ns": 2137943, "avg_ts": 135.252089, "stddev_ts": 0.305471, "samples_ns": [ 946232973, 944325834, 948593694 ], "samples_ts": [ 135.273, 135.546, 134.937 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T21:05:01Z", "avg_ns": 20870020017, "stddev_ns": 715888907, "avg_ts": 24.551686, "stddev_ts": 0.825912, "samples_ns": [ 20481345472, 21696176853, 20432537728 ], "samples_ts": [ 24.9984, 23.5986, 25.0581 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 89 }, { "timestamp_utc": "2025-12-08T21:06:37.896953+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:06:05Z\",\n \"avg_ns\": 3871085409,\n \"stddev_ns\": 11083354,\n \"avg_ts\": 132.263368,\n \"stddev_ts\": 0.378409,\n \"samples_ns\": [ 3869181966, 3861077224, 3882997038 ],\n \"samples_ts\": [ 132.328, 132.605, 131.857 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:06:20Z\",\n \"avg_ns\": 5748698143,\n \"stddev_ns\": 367049222,\n \"avg_ts\": 22.324430,\n \"stddev_ts\": 1.374777,\n \"samples_ns\": [ 6172479594, 5542472015, 5531142821 ],\n \"samples_ts\": [ 20.7372, 23.0944, 23.1417 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:06:05Z", "avg_ns": 3871085409, "stddev_ns": 11083354, "avg_ts": 132.263368, "stddev_ts": 0.378409, "samples_ns": [ 3869181966, 3861077224, 3882997038 ], "samples_ts": [ 132.328, 132.605, 131.857 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T21:06:20Z", "avg_ns": 5748698143, "stddev_ns": 367049222, "avg_ts": 22.32443, "stddev_ts": 1.374777, "samples_ns": [ 6172479594, 5542472015, 5531142821 ], "samples_ts": [ 20.7372, 23.0944, 23.1417 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 90 }, { "timestamp_utc": "2025-12-08T21:07:57.875608+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:06:38Z\",\n \"avg_ns\": 3868803540,\n \"stddev_ns\": 11260841,\n \"avg_ts\": 132.341405,\n \"stddev_ts\": 0.385722,\n \"samples_ns\": [ 3877557749, 3872752865, 3856100006 ],\n \"samples_ts\": [ 132.042, 132.206, 132.777 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:06:53Z\",\n \"avg_ns\": 21275817771,\n \"stddev_ns\": 278419372,\n \"avg_ts\": 24.067613,\n \"stddev_ts\": 0.313295,\n \"samples_ns\": [ 21586322641, 21048403226, 21192727447 ],\n \"samples_ts\": [ 23.7187, 24.3249, 24.1592 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:06:38Z", "avg_ns": 3868803540, "stddev_ns": 11260841, "avg_ts": 132.341405, "stddev_ts": 0.385722, "samples_ns": [ 3877557749, 3872752865, 3856100006 ], "samples_ts": [ 132.042, 132.206, 132.777 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T21:06:53Z", "avg_ns": 21275817771, "stddev_ns": 278419372, "avg_ts": 24.067613, "stddev_ts": 0.313295, "samples_ns": [ 21586322641, 21048403226, 21192727447 ], "samples_ts": [ 23.7187, 24.3249, 24.1592 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 91 }, { "timestamp_utc": "2025-12-08T21:08:18.125189+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:07:58Z\",\n \"avg_ns\": 942008068,\n \"stddev_ns\": 12656871,\n \"avg_ts\": 135.896196,\n \"stddev_ts\": 1.814782,\n \"samples_ns\": [ 937608451, 932138203, 956277552 ],\n \"samples_ts\": [ 136.518, 137.319, 133.852 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:08:02Z\",\n \"avg_ns\": 5275862265,\n \"stddev_ns\": 313203311,\n \"avg_ts\": 24.316705,\n \"stddev_ts\": 1.396502,\n \"samples_ns\": [ 5113453003, 5077220998, 5636912796 ],\n \"samples_ts\": [ 25.032, 25.2106, 22.7075 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:07:58Z", "avg_ns": 942008068, "stddev_ns": 12656871, "avg_ts": 135.896196, "stddev_ts": 1.814782, "samples_ns": [ 937608451, 932138203, 956277552 ], "samples_ts": [ 136.518, 137.319, 133.852 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T21:08:02Z", "avg_ns": 5275862265, "stddev_ns": 313203311, "avg_ts": 24.316705, "stddev_ts": 1.396502, "samples_ns": [ 5113453003, 5077220998, 5636912796 ], "samples_ts": [ 25.032, 25.2106, 22.7075 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 92 }, { "timestamp_utc": "2025-12-08T21:09:24.647713+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:08:18Z\",\n \"avg_ns\": 944172247,\n \"stddev_ns\": 5933926,\n \"avg_ts\": 135.572059,\n \"stddev_ts\": 0.853413,\n \"samples_ns\": [ 945204680, 937789852, 949522209 ],\n \"samples_ts\": [ 135.42, 136.491, 134.805 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:08:22Z\",\n \"avg_ns\": 20690106190,\n \"stddev_ns\": 66610883,\n \"avg_ts\": 24.746298,\n \"stddev_ts\": 0.079532,\n \"samples_ns\": [ 20660197305, 20643692382, 20766428885 ],\n \"samples_ts\": [ 24.782, 24.8018, 24.6552 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:08:18Z", "avg_ns": 944172247, "stddev_ns": 5933926, "avg_ts": 135.572059, "stddev_ts": 0.853413, "samples_ns": [ 945204680, 937789852, 949522209 ], "samples_ts": [ 135.42, 136.491, 134.805 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T21:08:22Z", "avg_ns": 20690106190, "stddev_ns": 66610883, "avg_ts": 24.746298, "stddev_ts": 0.079532, "samples_ns": [ 20660197305, 20643692382, 20766428885 ], "samples_ts": [ 24.782, 24.8018, 24.6552 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 93 }, { "timestamp_utc": "2025-12-08T21:09:57.632271+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:09:25Z\",\n \"avg_ns\": 4095923212,\n \"stddev_ns\": 11655644,\n \"avg_ts\": 125.003017,\n \"stddev_ts\": 0.355290,\n \"samples_ns\": [ 4086441099, 4108935484, 4092393055 ],\n \"samples_ts\": [ 125.292, 124.606, 125.11 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:09:41Z\",\n \"avg_ns\": 5290620650,\n \"stddev_ns\": 316509173,\n \"avg_ts\": 24.249703,\n \"stddev_ts\": 1.402729,\n \"samples_ns\": [ 5094394722, 5121713773, 5655753455 ],\n \"samples_ts\": [ 25.1257, 24.9916, 22.6318 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:09:25Z", "avg_ns": 4095923212, "stddev_ns": 11655644, "avg_ts": 125.003017, "stddev_ts": 0.35529, "samples_ns": [ 4086441099, 4108935484, 4092393055 ], "samples_ts": [ 125.292, 124.606, 125.11 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T21:09:41Z", "avg_ns": 5290620650, "stddev_ns": 316509173, "avg_ts": 24.249703, "stddev_ts": 1.402729, "samples_ns": [ 5094394722, 5121713773, 5655753455 ], "samples_ts": [ 25.1257, 24.9916, 22.6318 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 94 }, { "timestamp_utc": "2025-12-08T21:11:21.162684+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:09:58Z\",\n \"avg_ns\": 4089939466,\n \"stddev_ns\": 22315123,\n \"avg_ts\": 125.187708,\n \"stddev_ts\": 0.682186,\n \"samples_ns\": [ 4113801043, 4086430644, 4069586712 ],\n \"samples_ts\": [ 124.459, 125.293, 125.811 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:10:14Z\",\n \"avg_ns\": 22149645947,\n \"stddev_ns\": 737208094,\n \"avg_ts\": 23.132650,\n \"stddev_ts\": 0.773357,\n \"samples_ns\": [ 21385110462, 22207738011, 22856089369 ],\n \"samples_ts\": [ 23.9419, 23.055, 22.401 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:09:58Z", "avg_ns": 4089939466, "stddev_ns": 22315123, "avg_ts": 125.187708, "stddev_ts": 0.682186, "samples_ns": [ 4113801043, 4086430644, 4069586712 ], "samples_ts": [ 124.459, 125.293, 125.811 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T21:10:14Z", "avg_ns": 22149645947, "stddev_ns": 737208094, "avg_ts": 23.13265, "stddev_ts": 0.773357, "samples_ns": [ 21385110462, 22207738011, 22856089369 ], "samples_ts": [ 23.9419, 23.055, 22.401 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 95 }, { "timestamp_utc": "2025-12-08T21:11:41.196300+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:11:21Z\",\n \"avg_ns\": 1127860336,\n \"stddev_ns\": 333812771,\n \"avg_ts\": 119.446765,\n \"stddev_ts\": 30.193282,\n \"samples_ns\": [ 1513314088, 935272071, 934994850 ],\n \"samples_ts\": [ 84.5826, 136.859, 136.899 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:11:26Z\",\n \"avg_ns\": 5011635034,\n \"stddev_ns\": 34538143,\n \"avg_ts\": 25.541373,\n \"stddev_ts\": 0.175351,\n \"samples_ns\": [ 4988540942, 5051340214, 4995023946 ],\n \"samples_ts\": [ 25.6588, 25.3398, 25.6255 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:11:21Z", "avg_ns": 1127860336, "stddev_ns": 333812771, "avg_ts": 119.446765, "stddev_ts": 30.193282, "samples_ns": [ 1513314088, 935272071, 934994850 ], "samples_ts": [ 84.5826, 136.859, 136.899 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T21:11:26Z", "avg_ns": 5011635034, "stddev_ns": 34538143, "avg_ts": 25.541373, "stddev_ts": 0.175351, "samples_ns": [ 4988540942, 5051340214, 4995023946 ], "samples_ts": [ 25.6588, 25.3398, 25.6255 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 96 }, { "timestamp_utc": "2025-12-08T21:12:49.204924+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:11:41Z\",\n \"avg_ns\": 948124724,\n \"stddev_ns\": 10886835,\n \"avg_ts\": 135.015132,\n \"stddev_ts\": 1.541057,\n \"samples_ns\": [ 943483482, 940327765, 960562927 ],\n \"samples_ts\": [ 135.667, 136.123, 133.255 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:11:45Z\",\n \"avg_ns\": 21175747637,\n \"stddev_ns\": 514009500,\n \"avg_ts\": 24.188043,\n \"stddev_ts\": 0.583366,\n \"samples_ns\": [ 20717684515, 21077916265, 21731642132 ],\n \"samples_ts\": [ 24.7132, 24.2908, 23.5601 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:11:41Z", "avg_ns": 948124724, "stddev_ns": 10886835, "avg_ts": 135.015132, "stddev_ts": 1.541057, "samples_ns": [ 943483482, 940327765, 960562927 ], "samples_ts": [ 135.667, 136.123, 133.255 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T21:11:45Z", "avg_ns": 21175747637, "stddev_ns": 514009500, "avg_ts": 24.188043, "stddev_ts": 0.583366, "samples_ns": [ 20717684515, 21077916265, 21731642132 ], "samples_ts": [ 24.7132, 24.2908, 23.5601 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 97 }, { "timestamp_utc": "2025-12-08T21:13:21.535865+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:12:49Z\",\n \"avg_ns\": 4047638621,\n \"stddev_ns\": 338719105,\n \"avg_ts\": 127.059758,\n \"stddev_ts\": 10.150296,\n \"samples_ns\": [ 3833472438, 3871295782, 4438147645 ],\n \"samples_ts\": [ 133.56, 132.255, 115.363 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:13:06Z\",\n \"avg_ns\": 5025835505,\n \"stddev_ns\": 20051361,\n \"avg_ts\": 25.468673,\n \"stddev_ts\": 0.101798,\n \"samples_ns\": [ 5041393520, 5032906017, 5003206980 ],\n \"samples_ts\": [ 25.3898, 25.4326, 25.5836 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:12:49Z", "avg_ns": 4047638621, "stddev_ns": 338719105, "avg_ts": 127.059758, "stddev_ts": 10.150296, "samples_ns": [ 3833472438, 3871295782, 4438147645 ], "samples_ts": [ 133.56, 132.255, 115.363 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T21:13:06Z", "avg_ns": 5025835505, "stddev_ns": 20051361, "avg_ts": 25.468673, "stddev_ts": 0.101798, "samples_ns": [ 5041393520, 5032906017, 5003206980 ], "samples_ts": [ 25.3898, 25.4326, 25.5836 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 98 }, { "timestamp_utc": "2025-12-08T21:14:41.646190+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:13:22Z\",\n \"avg_ns\": 3851442657,\n \"stddev_ns\": 6608996,\n \"avg_ts\": 132.937460,\n \"stddev_ts\": 0.228211,\n \"samples_ns\": [ 3857285461, 3844270683, 3852771829 ],\n \"samples_ts\": [ 132.736, 133.185, 132.891 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:13:37Z\",\n \"avg_ns\": 21336159460,\n \"stddev_ns\": 618771945,\n \"avg_ts\": 24.010245,\n \"stddev_ts\": 0.694402,\n \"samples_ns\": [ 21976377850, 20741333992, 21290766538 ],\n \"samples_ts\": [ 23.2977, 24.685, 24.048 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:13:22Z", "avg_ns": 3851442657, "stddev_ns": 6608996, "avg_ts": 132.93746, "stddev_ts": 0.228211, "samples_ns": [ 3857285461, 3844270683, 3852771829 ], "samples_ts": [ 132.736, 133.185, 132.891 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T21:13:37Z", "avg_ns": 21336159460, "stddev_ns": 618771945, "avg_ts": 24.010245, "stddev_ts": 0.694402, "samples_ns": [ 21976377850, 20741333992, 21290766538 ], "samples_ts": [ 23.2977, 24.685, 24.048 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 99 }, { "timestamp_utc": "2025-12-08T21:15:01.134167+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:14:42Z\",\n \"avg_ns\": 943684958,\n \"stddev_ns\": 7370455,\n \"avg_ts\": 135.644025,\n \"stddev_ts\": 1.063571,\n \"samples_ns\": [ 946595248, 949155875, 935303752 ],\n \"samples_ts\": [ 135.221, 134.857, 136.854 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:14:46Z\",\n \"avg_ns\": 5010757896,\n \"stddev_ns\": 28678575,\n \"avg_ts\": 25.545594,\n \"stddev_ts\": 0.145738,\n \"samples_ns\": [ 4992101166, 4996392370, 5043780153 ],\n \"samples_ts\": [ 25.6405, 25.6185, 25.3778 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:14:42Z", "avg_ns": 943684958, "stddev_ns": 7370455, "avg_ts": 135.644025, "stddev_ts": 1.063571, "samples_ns": [ 946595248, 949155875, 935303752 ], "samples_ts": [ 135.221, 134.857, 136.854 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T21:14:46Z", "avg_ns": 5010757896, "stddev_ns": 28678575, "avg_ts": 25.545594, "stddev_ts": 0.145738, "samples_ns": [ 4992101166, 4996392370, 5043780153 ], "samples_ts": [ 25.6405, 25.6185, 25.3778 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 100 }, { "timestamp_utc": "2025-12-08T21:16:08.638051+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:15:01Z\",\n \"avg_ns\": 936345960,\n \"stddev_ns\": 3958392,\n \"avg_ts\": 136.703236,\n \"stddev_ts\": 0.577027,\n \"samples_ns\": [ 940710249, 932987944, 935339688 ],\n \"samples_ts\": [ 136.067, 137.194, 136.849 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:15:05Z\",\n \"avg_ns\": 21015437618,\n \"stddev_ns\": 369781356,\n \"avg_ts\": 24.368098,\n \"stddev_ts\": 0.431000,\n \"samples_ns\": [ 21342830399, 21089120009, 20614362448 ],\n \"samples_ts\": [ 23.9893, 24.2779, 24.8371 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:15:01Z", "avg_ns": 936345960, "stddev_ns": 3958392, "avg_ts": 136.703236, "stddev_ts": 0.577027, "samples_ns": [ 940710249, 932987944, 935339688 ], "samples_ts": [ 136.067, 137.194, 136.849 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T21:15:05Z", "avg_ns": 21015437618, "stddev_ns": 369781356, "avg_ts": 24.368098, "stddev_ts": 0.431, "samples_ns": [ 21342830399, 21089120009, 20614362448 ], "samples_ts": [ 23.9893, 24.2779, 24.8371 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 101 }, { "timestamp_utc": "2025-12-08T21:16:42.097298+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:16:09Z\",\n \"avg_ns\": 3889577984,\n \"stddev_ns\": 17593588,\n \"avg_ts\": 131.635609,\n \"stddev_ts\": 0.594031,\n \"samples_ns\": [ 3876874648, 3882200002, 3909659302 ],\n \"samples_ts\": [ 132.065, 131.884, 130.958 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:16:24Z\",\n \"avg_ns\": 5742551792,\n \"stddev_ns\": 350472508,\n \"avg_ts\": 22.343358,\n \"stddev_ts\": 1.317771,\n \"samples_ns\": [ 6146743734, 5523059420, 5557852222 ],\n \"samples_ts\": [ 20.824, 23.1756, 23.0305 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:16:09Z", "avg_ns": 3889577984, "stddev_ns": 17593588, "avg_ts": 131.635609, "stddev_ts": 0.594031, "samples_ns": [ 3876874648, 3882200002, 3909659302 ], "samples_ts": [ 132.065, 131.884, 130.958 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T21:16:24Z", "avg_ns": 5742551792, "stddev_ns": 350472508, "avg_ts": 22.343358, "stddev_ts": 1.317771, "samples_ns": [ 6146743734, 5523059420, 5557852222 ], "samples_ts": [ 20.824, 23.1756, 23.0305 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 102 }, { "timestamp_utc": "2025-12-08T21:18:04.953899+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:16:42Z\",\n \"avg_ns\": 3818857957,\n \"stddev_ns\": 9597907,\n \"avg_ts\": 134.072060,\n \"stddev_ts\": 0.336674,\n \"samples_ns\": [ 3829383192, 3810590247, 3816600433 ],\n \"samples_ts\": [ 133.703, 134.362, 134.151 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:16:57Z\",\n \"avg_ns\": 22293976240,\n \"stddev_ns\": 168665433,\n \"avg_ts\": 22.966725,\n \"stddev_ts\": 0.174483,\n \"samples_ns\": [ 22100198823, 22373961753, 22407768146 ],\n \"samples_ts\": [ 23.1672, 22.8837, 22.8492 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:16:42Z", "avg_ns": 3818857957, "stddev_ns": 9597907, "avg_ts": 134.07206, "stddev_ts": 0.336674, "samples_ns": [ 3829383192, 3810590247, 3816600433 ], "samples_ts": [ 133.703, 134.362, 134.151 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T21:16:57Z", "avg_ns": 22293976240, "stddev_ns": 168665433, "avg_ts": 22.966725, "stddev_ts": 0.174483, "samples_ns": [ 22100198823, 22373961753, 22407768146 ], "samples_ts": [ 23.1672, 22.8837, 22.8492 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 103 }, { "timestamp_utc": "2025-12-08T21:18:24.400432+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:18:05Z\",\n \"avg_ns\": 938249947,\n \"stddev_ns\": 838822,\n \"avg_ts\": 136.424274,\n \"stddev_ts\": 0.121773,\n \"samples_ns\": [ 937510965, 939159972, 938078906 ],\n \"samples_ts\": [ 136.532, 136.292, 136.449 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:18:09Z\",\n \"avg_ns\": 5000774849,\n \"stddev_ns\": 19593757,\n \"avg_ts\": 25.596296,\n \"stddev_ts\": 0.100351,\n \"samples_ns\": [ 4980231064, 5019254970, 5002838514 ],\n \"samples_ts\": [ 25.7016, 25.5018, 25.5855 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:18:05Z", "avg_ns": 938249947, "stddev_ns": 838822, "avg_ts": 136.424274, "stddev_ts": 0.121773, "samples_ns": [ 937510965, 939159972, 938078906 ], "samples_ts": [ 136.532, 136.292, 136.449 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T21:18:09Z", "avg_ns": 5000774849, "stddev_ns": 19593757, "avg_ts": 25.596296, "stddev_ts": 0.100351, "samples_ns": [ 4980231064, 5019254970, 5002838514 ], "samples_ts": [ 25.7016, 25.5018, 25.5855 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 104 }, { "timestamp_utc": "2025-12-08T21:19:32.381981+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:18:24Z\",\n \"avg_ns\": 947025103,\n \"stddev_ns\": 1605094,\n \"avg_ts\": 135.160351,\n \"stddev_ts\": 0.229219,\n \"samples_ns\": [ 948011589, 947890010, 945173712 ],\n \"samples_ts\": [ 135.019, 135.037, 135.425 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:18:28Z\",\n \"avg_ns\": 21176692457,\n \"stddev_ns\": 21323937,\n \"avg_ts\": 24.177541,\n \"stddev_ts\": 0.024350,\n \"samples_ns\": [ 21179521069, 21196460916, 21154095386 ],\n \"samples_ts\": [ 24.1743, 24.155, 24.2034 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:18:24Z", "avg_ns": 947025103, "stddev_ns": 1605094, "avg_ts": 135.160351, "stddev_ts": 0.229219, "samples_ns": [ 948011589, 947890010, 945173712 ], "samples_ts": [ 135.019, 135.037, 135.425 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T21:18:28Z", "avg_ns": 21176692457, "stddev_ns": 21323937, "avg_ts": 24.177541, "stddev_ts": 0.02435, "samples_ns": [ 21179521069, 21196460916, 21154095386 ], "samples_ts": [ 24.1743, 24.155, 24.2034 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 105 }, { "timestamp_utc": "2025-12-08T21:20:06.009631+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:19:32Z\",\n \"avg_ns\": 4084998986,\n \"stddev_ns\": 18549709,\n \"avg_ts\": 125.338347,\n \"stddev_ts\": 0.567816,\n \"samples_ns\": [ 4077236694, 4106168792, 4071591473 ],\n \"samples_ts\": [ 125.575, 124.69, 125.749 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:19:49Z\",\n \"avg_ns\": 5522688644,\n \"stddev_ns\": 23988943,\n \"avg_ts\": 23.177408,\n \"stddev_ts\": 0.100560,\n \"samples_ns\": [ 5518272305, 5501214841, 5548578787 ],\n \"samples_ts\": [ 23.1957, 23.2676, 23.069 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:19:32Z", "avg_ns": 4084998986, "stddev_ns": 18549709, "avg_ts": 125.338347, "stddev_ts": 0.567816, "samples_ns": [ 4077236694, 4106168792, 4071591473 ], "samples_ts": [ 125.575, 124.69, 125.749 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T21:19:49Z", "avg_ns": 5522688644, "stddev_ns": 23988943, "avg_ts": 23.177408, "stddev_ts": 0.10056, "samples_ns": [ 5518272305, 5501214841, 5548578787 ], "samples_ts": [ 23.1957, 23.2676, 23.069 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 106 }, { "timestamp_utc": "2025-12-08T21:21:25.360522+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:20:06Z\",\n \"avg_ns\": 4085637975,\n \"stddev_ns\": 18282239,\n \"avg_ts\": 125.318699,\n \"stddev_ts\": 0.560828,\n \"samples_ns\": [ 4067215350, 4103776061, 4085922516 ],\n \"samples_ts\": [ 125.885, 124.763, 125.308 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:20:23Z\",\n \"avg_ns\": 20567248973,\n \"stddev_ns\": 94968301,\n \"avg_ts\": 24.894301,\n \"stddev_ts\": 0.115252,\n \"samples_ns\": [ 20617319843, 20626704125, 20457722951 ],\n \"samples_ts\": [ 24.8335, 24.8222, 25.0272 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:20:06Z", "avg_ns": 4085637975, "stddev_ns": 18282239, "avg_ts": 125.318699, "stddev_ts": 0.560828, "samples_ns": [ 4067215350, 4103776061, 4085922516 ], "samples_ts": [ 125.885, 124.763, 125.308 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T21:20:23Z", "avg_ns": 20567248973, "stddev_ns": 94968301, "avg_ts": 24.894301, "stddev_ts": 0.115252, "samples_ns": [ 20617319843, 20626704125, 20457722951 ], "samples_ts": [ 24.8335, 24.8222, 25.0272 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 107 }, { "timestamp_utc": "2025-12-08T21:21:37.072210+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:21:25Z\",\n \"avg_ns\": 495314874,\n \"stddev_ns\": 2683575,\n \"avg_ts\": 258.426529,\n \"stddev_ts\": 1.399705,\n \"samples_ns\": [ 492674157, 495231473, 498038994 ],\n \"samples_ts\": [ 259.807, 258.465, 257.008 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:21:28Z\",\n \"avg_ns\": 2836733521,\n \"stddev_ns\": 11784093,\n \"avg_ts\": 45.122839,\n \"stddev_ts\": 0.187208,\n \"samples_ns\": [ 2834258568, 2826383715, 2849558282 ],\n \"samples_ts\": [ 45.1617, 45.2876, 44.9192 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:21:25Z", "avg_ns": 495314874, "stddev_ns": 2683575, "avg_ts": 258.426529, "stddev_ts": 1.399705, "samples_ns": [ 492674157, 495231473, 498038994 ], "samples_ts": [ 259.807, 258.465, 257.008 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T21:21:28Z", "avg_ns": 2836733521, "stddev_ns": 11784093, "avg_ts": 45.122839, "stddev_ts": 0.187208, "samples_ns": [ 2834258568, 2826383715, 2849558282 ], "samples_ts": [ 45.1617, 45.2876, 44.9192 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 512, "ubatch_size": 128, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 108 }, { "timestamp_utc": "2025-12-08T21:22:15.298814+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:21:37Z\",\n \"avg_ns\": 492395988,\n \"stddev_ns\": 722187,\n \"avg_ts\": 259.953750,\n \"stddev_ts\": 0.381023,\n \"samples_ns\": [ 493206910, 492158926, 491822128 ],\n \"samples_ts\": [ 259.526, 260.079, 260.257 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:21:39Z\",\n \"avg_ns\": 11865142367,\n \"stddev_ns\": 306841704,\n \"avg_ts\": 43.170575,\n \"stddev_ts\": 1.100001,\n \"samples_ns\": [ 12219452271, 11687553161, 11688421670 ],\n \"samples_ts\": [ 41.9004, 43.8073, 43.804 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:21:37Z", "avg_ns": 492395988, "stddev_ns": 722187, "avg_ts": 259.95375, "stddev_ts": 0.381023, "samples_ns": [ 493206910, 492158926, 491822128 ], "samples_ts": [ 259.526, 260.079, 260.257 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T21:21:39Z", "avg_ns": 11865142367, "stddev_ns": 306841704, "avg_ts": 43.170575, "stddev_ts": 1.100001, "samples_ns": [ 12219452271, 11687553161, 11688421670 ], "samples_ts": [ 41.9004, 43.8073, 43.804 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 512, "ubatch_size": 128, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 109 }, { "timestamp_utc": "2025-12-08T21:22:32.544433+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:22:15Z\",\n \"avg_ns\": 2024431929,\n \"stddev_ns\": 5725351,\n \"avg_ts\": 252.911801,\n \"stddev_ts\": 0.714157,\n \"samples_ns\": [ 2021734931, 2031007693, 2020553163 ],\n \"samples_ts\": [ 253.248, 252.092, 253.396 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:22:23Z\",\n \"avg_ns\": 2833007730,\n \"stddev_ns\": 13046410,\n \"avg_ts\": 45.182303,\n \"stddev_ts\": 0.208560,\n \"samples_ns\": [ 2842546357, 2818140549, 2838336285 ],\n \"samples_ts\": [ 45.03, 45.42, 45.0968 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:22:15Z", "avg_ns": 2024431929, "stddev_ns": 5725351, "avg_ts": 252.911801, "stddev_ts": 0.714157, "samples_ns": [ 2021734931, 2031007693, 2020553163 ], "samples_ts": [ 253.248, 252.092, 253.396 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T21:22:23Z", "avg_ns": 2833007730, "stddev_ns": 13046410, "avg_ts": 45.182303, "stddev_ts": 0.20856, "samples_ns": [ 2842546357, 2818140549, 2838336285 ], "samples_ts": [ 45.03, 45.42, 45.0968 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 512, "ubatch_size": 128, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 110 }, { "timestamp_utc": "2025-12-08T21:23:17.411338+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:22:33Z\",\n \"avg_ns\": 2011743305,\n \"stddev_ns\": 4059260,\n \"avg_ts\": 254.506322,\n \"stddev_ts\": 0.513481,\n \"samples_ns\": [ 2015877010, 2011590077, 2007762828 ],\n \"samples_ts\": [ 253.984, 254.525, 255.01 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:22:41Z\",\n \"avg_ns\": 11843177768,\n \"stddev_ns\": 405065724,\n \"avg_ts\": 43.264728,\n \"stddev_ts\": 1.451129,\n \"samples_ns\": [ 11603944065, 11614723265, 12310865974 ],\n \"samples_ts\": [ 44.1229, 44.082, 41.5893 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:22:33Z", "avg_ns": 2011743305, "stddev_ns": 4059260, "avg_ts": 254.506322, "stddev_ts": 0.513481, "samples_ns": [ 2015877010, 2011590077, 2007762828 ], "samples_ts": [ 253.984, 254.525, 255.01 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T21:22:41Z", "avg_ns": 11843177768, "stddev_ns": 405065724, "avg_ts": 43.264728, "stddev_ts": 1.451129, "samples_ns": [ 11603944065, 11614723265, 12310865974 ], "samples_ts": [ 44.1229, 44.082, 41.5893 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 512, "ubatch_size": 128, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 111 }, { "timestamp_utc": "2025-12-08T21:23:28.574779+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:23:17Z\",\n \"avg_ns\": 495076596,\n \"stddev_ns\": 1168462,\n \"avg_ts\": 258.546811,\n \"stddev_ts\": 0.610045,\n \"samples_ns\": [ 496230470, 495104429, 493894891 ],\n \"samples_ts\": [ 257.945, 258.531, 259.164 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:23:19Z\",\n \"avg_ns\": 2841229965,\n \"stddev_ns\": 9909782,\n \"avg_ts\": 45.051277,\n \"stddev_ts\": 0.156953,\n \"samples_ns\": [ 2852074057, 2838971020, 2832644819 ],\n \"samples_ts\": [ 44.8796, 45.0868, 45.1875 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:23:17Z", "avg_ns": 495076596, "stddev_ns": 1168462, "avg_ts": 258.546811, "stddev_ts": 0.610045, "samples_ns": [ 496230470, 495104429, 493894891 ], "samples_ts": [ 257.945, 258.531, 259.164 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T21:23:19Z", "avg_ns": 2841229965, "stddev_ns": 9909782, "avg_ts": 45.051277, "stddev_ts": 0.156953, "samples_ns": [ 2852074057, 2838971020, 2832644819 ], "samples_ts": [ 44.8796, 45.0868, 45.1875 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 512, "ubatch_size": 256, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 112 }, { "timestamp_utc": "2025-12-08T21:24:06.865143+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:23:29Z\",\n \"avg_ns\": 499323311,\n \"stddev_ns\": 1636162,\n \"avg_ts\": 256.348771,\n \"stddev_ts\": 0.841008,\n \"samples_ns\": [ 499753613, 500701320, 497515000 ],\n \"samples_ts\": [ 256.126, 255.641, 257.279 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:23:31Z\",\n \"avg_ns\": 11874955613,\n \"stddev_ns\": 347376318,\n \"avg_ts\": 43.140156,\n \"stddev_ts\": 1.241160,\n \"samples_ns\": [ 12275768780, 11661062048, 11688036012 ],\n \"samples_ts\": [ 41.7082, 43.9068, 43.8055 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:23:29Z", "avg_ns": 499323311, "stddev_ns": 1636162, "avg_ts": 256.348771, "stddev_ts": 0.841008, "samples_ns": [ 499753613, 500701320, 497515000 ], "samples_ts": [ 256.126, 255.641, 257.279 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T21:23:31Z", "avg_ns": 11874955613, "stddev_ns": 347376318, "avg_ts": 43.140156, "stddev_ts": 1.24116, "samples_ns": [ 12275768780, 11661062048, 11688036012 ], "samples_ts": [ 41.7082, 43.9068, 43.8055 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 512, "ubatch_size": 256, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 113 }, { "timestamp_utc": "2025-12-08T21:24:24.192435+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:24:07Z\",\n \"avg_ns\": 2023228574,\n \"stddev_ns\": 7603549,\n \"avg_ts\": 253.063257,\n \"stddev_ts\": 0.949493,\n \"samples_ns\": [ 2031759171, 2017164483, 2020762068 ],\n \"samples_ts\": [ 251.998, 253.822, 253.37 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:24:15Z\",\n \"avg_ns\": 2856979782,\n \"stddev_ns\": 8138557,\n \"avg_ts\": 44.802799,\n \"stddev_ts\": 0.127421,\n \"samples_ns\": [ 2866349202, 2852922560, 2851667585 ],\n \"samples_ts\": [ 44.6561, 44.8663, 44.886 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:24:07Z", "avg_ns": 2023228574, "stddev_ns": 7603549, "avg_ts": 253.063257, "stddev_ts": 0.949493, "samples_ns": [ 2031759171, 2017164483, 2020762068 ], "samples_ts": [ 251.998, 253.822, 253.37 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T21:24:15Z", "avg_ns": 2856979782, "stddev_ns": 8138557, "avg_ts": 44.802799, "stddev_ts": 0.127421, "samples_ns": [ 2866349202, 2852922560, 2851667585 ], "samples_ts": [ 44.6561, 44.8663, 44.886 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 512, "ubatch_size": 256, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 114 }, { "timestamp_utc": "2025-12-08T21:25:10.313975+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:24:24Z\",\n \"avg_ns\": 2011233988,\n \"stddev_ns\": 9929441,\n \"avg_ts\": 254.574215,\n \"stddev_ts\": 1.255992,\n \"samples_ns\": [ 2010343773, 2001779833, 2021578360 ],\n \"samples_ts\": [ 254.683, 255.772, 253.267 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:24:33Z\",\n \"avg_ns\": 12270921518,\n \"stddev_ns\": 485567999,\n \"avg_ts\": 41.768521,\n \"stddev_ts\": 1.663129,\n \"samples_ns\": [ 12732317404, 12316106991, 11764340159 ],\n \"samples_ts\": [ 40.2126, 41.5716, 43.5214 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:24:24Z", "avg_ns": 2011233988, "stddev_ns": 9929441, "avg_ts": 254.574215, "stddev_ts": 1.255992, "samples_ns": [ 2010343773, 2001779833, 2021578360 ], "samples_ts": [ 254.683, 255.772, 253.267 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T21:24:33Z", "avg_ns": 12270921518, "stddev_ns": 485567999, "avg_ts": 41.768521, "stddev_ts": 1.663129, "samples_ns": [ 12732317404, 12316106991, 11764340159 ], "samples_ts": [ 40.2126, 41.5716, 43.5214 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 512, "ubatch_size": 256, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 115 }, { "timestamp_utc": "2025-12-08T21:25:22.062144+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:25:10Z\",\n \"avg_ns\": 489417064,\n \"stddev_ns\": 2282695,\n \"avg_ts\": 261.539413,\n \"stddev_ts\": 1.216758,\n \"samples_ns\": [ 487817098, 492030963, 488403132 ],\n \"samples_ts\": [ 262.393, 260.146, 262.079 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:25:13Z\",\n \"avg_ns\": 2847802791,\n \"stddev_ns\": 19740330,\n \"avg_ts\": 44.948377,\n \"stddev_ts\": 0.312514,\n \"samples_ns\": [ 2854168567, 2825665001, 2863574805 ],\n \"samples_ts\": [ 44.8467, 45.2991, 44.6994 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:25:10Z", "avg_ns": 489417064, "stddev_ns": 2282695, "avg_ts": 261.539413, "stddev_ts": 1.216758, "samples_ns": [ 487817098, 492030963, 488403132 ], "samples_ts": [ 262.393, 260.146, 262.079 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T21:25:13Z", "avg_ns": 2847802791, "stddev_ns": 19740330, "avg_ts": 44.948377, "stddev_ts": 0.312514, "samples_ns": [ 2854168567, 2825665001, 2863574805 ], "samples_ts": [ 44.8467, 45.2991, 44.6994 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 512, "ubatch_size": 512, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 116 }, { "timestamp_utc": "2025-12-08T21:26:00.205268+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:25:22Z\",\n \"avg_ns\": 498553847,\n \"stddev_ns\": 2147351,\n \"avg_ts\": 256.745759,\n \"stddev_ts\": 1.107750,\n \"samples_ns\": [ 499200279, 500303604, 496157659 ],\n \"samples_ts\": [ 256.41, 255.845, 257.983 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:25:24Z\",\n \"avg_ns\": 11827195619,\n \"stddev_ns\": 353470695,\n \"avg_ts\": 43.315421,\n \"stddev_ts\": 1.272930,\n \"samples_ns\": [ 11644221682, 12234644510, 11602720665 ],\n \"samples_ts\": [ 43.9703, 41.8484, 44.1276 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:25:22Z", "avg_ns": 498553847, "stddev_ns": 2147351, "avg_ts": 256.745759, "stddev_ts": 1.10775, "samples_ns": [ 499200279, 500303604, 496157659 ], "samples_ts": [ 256.41, 255.845, 257.983 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T21:25:24Z", "avg_ns": 11827195619, "stddev_ns": 353470695, "avg_ts": 43.315421, "stddev_ts": 1.27293, "samples_ns": [ 11644221682, 12234644510, 11602720665 ], "samples_ts": [ 43.9703, 41.8484, 44.1276 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 512, "ubatch_size": 512, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 117 }, { "timestamp_utc": "2025-12-08T21:26:18.519918+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:26:00Z\",\n \"avg_ns\": 2331374450,\n \"stddev_ns\": 227441165,\n \"avg_ts\": 220.974975,\n \"stddev_ts\": 20.966430,\n \"samples_ns\": [ 2134337510, 2279520676, 2580265166 ],\n \"samples_ts\": [ 239.887, 224.609, 198.429 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:26:09Z\",\n \"avg_ns\": 2829896080,\n \"stddev_ns\": 16287643,\n \"avg_ts\": 45.232345,\n \"stddev_ts\": 0.261125,\n \"samples_ns\": [ 2811287142, 2836840907, 2841560191 ],\n \"samples_ts\": [ 45.5307, 45.1206, 45.0457 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:26:00Z", "avg_ns": 2331374450, "stddev_ns": 227441165, "avg_ts": 220.974975, "stddev_ts": 20.96643, "samples_ns": [ 2134337510, 2279520676, 2580265166 ], "samples_ts": [ 239.887, 224.609, 198.429 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T21:26:09Z", "avg_ns": 2829896080, "stddev_ns": 16287643, "avg_ts": 45.232345, "stddev_ts": 0.261125, "samples_ns": [ 2811287142, 2836840907, 2841560191 ], "samples_ts": [ 45.5307, 45.1206, 45.0457 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 512, "ubatch_size": 512, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 118 }, { "timestamp_utc": "2025-12-08T21:27:02.646925+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:26:19Z\",\n \"avg_ns\": 2127285773,\n \"stddev_ns\": 5302720,\n \"avg_ts\": 240.683282,\n \"stddev_ts\": 0.600148,\n \"samples_ns\": [ 2121747000, 2132315296, 2127795024 ],\n \"samples_ts\": [ 241.311, 240.115, 240.625 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:26:27Z\",\n \"avg_ns\": 11624166837,\n \"stddev_ns\": 25664581,\n \"avg_ts\": 44.046311,\n \"stddev_ts\": 0.097197,\n \"samples_ns\": [ 11600801179, 11620064167, 11651635167 ],\n \"samples_ts\": [ 44.1349, 44.0617, 43.9423 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:26:19Z", "avg_ns": 2127285773, "stddev_ns": 5302720, "avg_ts": 240.683282, "stddev_ts": 0.600148, "samples_ns": [ 2121747000, 2132315296, 2127795024 ], "samples_ts": [ 241.311, 240.115, 240.625 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T21:26:27Z", "avg_ns": 11624166837, "stddev_ns": 25664581, "avg_ts": 44.046311, "stddev_ts": 0.097197, "samples_ns": [ 11600801179, 11620064167, 11651635167 ], "samples_ts": [ 44.1349, 44.0617, 43.9423 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 512, "ubatch_size": 512, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 119 }, { "timestamp_utc": "2025-12-08T21:27:14.222713+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:27:03Z\",\n \"avg_ns\": 494979822,\n \"stddev_ns\": 1592343,\n \"avg_ts\": 258.598183,\n \"stddev_ts\": 0.831568,\n \"samples_ns\": [ 496636288, 493460468, 494842710 ],\n \"samples_ts\": [ 257.734, 259.393, 258.668 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:27:05Z\",\n \"avg_ns\": 2827894679,\n \"stddev_ns\": 6249864,\n \"avg_ts\": 45.263502,\n \"stddev_ts\": 0.100076,\n \"samples_ns\": [ 2833731045, 2828652557, 2821300435 ],\n \"samples_ts\": [ 45.1701, 45.2512, 45.3691 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:27:03Z", "avg_ns": 494979822, "stddev_ns": 1592343, "avg_ts": 258.598183, "stddev_ts": 0.831568, "samples_ns": [ 496636288, 493460468, 494842710 ], "samples_ts": [ 257.734, 259.393, 258.668 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T21:27:05Z", "avg_ns": 2827894679, "stddev_ns": 6249864, "avg_ts": 45.263502, "stddev_ts": 0.100076, "samples_ns": [ 2833731045, 2828652557, 2821300435 ], "samples_ts": [ 45.1701, 45.2512, 45.3691 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 120 }, { "timestamp_utc": "2025-12-08T21:27:52.419349+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:27:14Z\",\n \"avg_ns\": 493099945,\n \"stddev_ns\": 2380801,\n \"avg_ts\": 259.586287,\n \"stddev_ts\": 1.249874,\n \"samples_ns\": [ 491799983, 491652120, 495847732 ],\n \"samples_ts\": [ 260.268, 260.347, 258.144 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:27:16Z\",\n \"avg_ns\": 11850589317,\n \"stddev_ns\": 366217708,\n \"avg_ts\": 43.231649,\n \"stddev_ts\": 1.312823,\n \"samples_ns\": [ 12272962099, 11657188721, 11621617131 ],\n \"samples_ts\": [ 41.7177, 43.9214, 44.0558 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:27:14Z", "avg_ns": 493099945, "stddev_ns": 2380801, "avg_ts": 259.586287, "stddev_ts": 1.249874, "samples_ns": [ 491799983, 491652120, 495847732 ], "samples_ts": [ 260.268, 260.347, 258.144 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T21:27:16Z", "avg_ns": 11850589317, "stddev_ns": 366217708, "avg_ts": 43.231649, "stddev_ts": 1.312823, "samples_ns": [ 12272962099, 11657188721, 11621617131 ], "samples_ts": [ 41.7177, 43.9214, 44.0558 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 121 }, { "timestamp_utc": "2025-12-08T21:28:09.782173+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:27:52Z\",\n \"avg_ns\": 2023645113,\n \"stddev_ns\": 8566754,\n \"avg_ts\": 253.011815,\n \"stddev_ts\": 1.072119,\n \"samples_ns\": [ 2031471565, 2014492682, 2024971092 ],\n \"samples_ts\": [ 252.034, 254.158, 252.843 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:28:01Z\",\n \"avg_ns\": 2865755346,\n \"stddev_ns\": 16441019,\n \"avg_ts\": 44.666341,\n \"stddev_ts\": 0.255888,\n \"samples_ns\": [ 2883427143, 2850912372, 2862926524 ],\n \"samples_ts\": [ 44.3916, 44.8979, 44.7095 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:27:52Z", "avg_ns": 2023645113, "stddev_ns": 8566754, "avg_ts": 253.011815, "stddev_ts": 1.072119, "samples_ns": [ 2031471565, 2014492682, 2024971092 ], "samples_ts": [ 252.034, 254.158, 252.843 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T21:28:01Z", "avg_ns": 2865755346, "stddev_ns": 16441019, "avg_ts": 44.666341, "stddev_ts": 0.255888, "samples_ns": [ 2883427143, 2850912372, 2862926524 ], "samples_ts": [ 44.3916, 44.8979, 44.7095 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 122 }, { "timestamp_utc": "2025-12-08T21:28:54.813137+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:28:10Z\",\n \"avg_ns\": 2018620828,\n \"stddev_ns\": 5330147,\n \"avg_ts\": 253.639698,\n \"stddev_ts\": 0.668927,\n \"samples_ns\": [ 2014401317, 2016850475, 2024610693 ],\n \"samples_ts\": [ 254.17, 253.861, 252.888 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:28:18Z\",\n \"avg_ns\": 12091529207,\n \"stddev_ns\": 334689778,\n \"avg_ts\": 42.365678,\n \"stddev_ts\": 1.191399,\n \"samples_ns\": [ 12305000358, 11705795806, 12263791458 ],\n \"samples_ts\": [ 41.6091, 43.739, 41.7489 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:28:10Z", "avg_ns": 2018620828, "stddev_ns": 5330147, "avg_ts": 253.639698, "stddev_ts": 0.668927, "samples_ns": [ 2014401317, 2016850475, 2024610693 ], "samples_ts": [ 254.17, 253.861, 252.888 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T21:28:18Z", "avg_ns": 12091529207, "stddev_ns": 334689778, "avg_ts": 42.365678, "stddev_ts": 1.191399, "samples_ns": [ 12305000358, 11705795806, 12263791458 ], "samples_ts": [ 41.6091, 43.739, 41.7489 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 123 }, { "timestamp_utc": "2025-12-08T21:29:06.663117+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:28:55Z\",\n \"avg_ns\": 496284865,\n \"stddev_ns\": 3166949,\n \"avg_ts\": 257.923379,\n \"stddev_ts\": 1.643496,\n \"samples_ns\": [ 493387073, 495802266, 499665258 ],\n \"samples_ts\": [ 259.431, 258.167, 256.172 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:28:57Z\",\n \"avg_ns\": 3060779736,\n \"stddev_ns\": 360443712,\n \"avg_ts\": 42.185085,\n \"stddev_ts\": 4.658261,\n \"samples_ns\": [ 2829999083, 2876211919, 3476128206 ],\n \"samples_ts\": [ 45.2297, 44.503, 36.8226 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:28:55Z", "avg_ns": 496284865, "stddev_ns": 3166949, "avg_ts": 257.923379, "stddev_ts": 1.643496, "samples_ns": [ 493387073, 495802266, 499665258 ], "samples_ts": [ 259.431, 258.167, 256.172 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T21:28:57Z", "avg_ns": 3060779736, "stddev_ns": 360443712, "avg_ts": 42.185085, "stddev_ts": 4.658261, "samples_ns": [ 2829999083, 2876211919, 3476128206 ], "samples_ts": [ 45.2297, 44.503, 36.8226 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 124 }, { "timestamp_utc": "2025-12-08T21:29:44.295240+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:29:07Z\",\n \"avg_ns\": 498362988,\n \"stddev_ns\": 2208588,\n \"avg_ts\": 256.844272,\n \"stddev_ts\": 1.140306,\n \"samples_ns\": [ 500153633, 499040103, 495895229 ],\n \"samples_ts\": [ 255.921, 256.492, 258.119 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:29:09Z\",\n \"avg_ns\": 11648421219,\n \"stddev_ns\": 33200496,\n \"avg_ts\": 43.954693,\n \"stddev_ts\": 0.125442,\n \"samples_ns\": [ 11674422830, 11611023724, 11659817104 ],\n \"samples_ts\": [ 43.8566, 44.096, 43.9115 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:29:07Z", "avg_ns": 498362988, "stddev_ns": 2208588, "avg_ts": 256.844272, "stddev_ts": 1.140306, "samples_ns": [ 500153633, 499040103, 495895229 ], "samples_ts": [ 255.921, 256.492, 258.119 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T21:29:09Z", "avg_ns": 11648421219, "stddev_ns": 33200496, "avg_ts": 43.954693, "stddev_ts": 0.125442, "samples_ns": [ 11674422830, 11611023724, 11659817104 ], "samples_ts": [ 43.8566, 44.096, 43.9115 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 125 }, { "timestamp_utc": "2025-12-08T21:30:01.521732+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:29:44Z\",\n \"avg_ns\": 2017633656,\n \"stddev_ns\": 12182399,\n \"avg_ts\": 253.768790,\n \"stddev_ts\": 1.533038,\n \"samples_ns\": [ 2018305853, 2005129075, 2029466040 ],\n \"samples_ts\": [ 253.678, 255.345, 252.283 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:29:52Z\",\n \"avg_ns\": 2832730487,\n \"stddev_ns\": 7192275,\n \"avg_ts\": 45.186279,\n \"stddev_ts\": 0.114557,\n \"samples_ns\": [ 2828456250, 2828701232, 2841033980 ],\n \"samples_ts\": [ 45.2544, 45.2504, 45.054 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:29:44Z", "avg_ns": 2017633656, "stddev_ns": 12182399, "avg_ts": 253.76879, "stddev_ts": 1.533038, "samples_ns": [ 2018305853, 2005129075, 2029466040 ], "samples_ts": [ 253.678, 255.345, 252.283 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T21:29:52Z", "avg_ns": 2832730487, "stddev_ns": 7192275, "avg_ts": 45.186279, "stddev_ts": 0.114557, "samples_ns": [ 2828456250, 2828701232, 2841033980 ], "samples_ts": [ 45.2544, 45.2504, 45.054 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 126 }, { "timestamp_utc": "2025-12-08T21:30:46.365094+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:30:02Z\",\n \"avg_ns\": 2034022711,\n \"stddev_ns\": 2954270,\n \"avg_ts\": 251.718290,\n \"stddev_ts\": 0.365412,\n \"samples_ns\": [ 2033450642, 2037220830, 2031396662 ],\n \"samples_ts\": [ 251.789, 251.323, 252.043 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:30:10Z\",\n \"avg_ns\": 11903711805,\n \"stddev_ns\": 356940656,\n \"avg_ts\": 43.037332,\n \"stddev_ts\": 1.277652,\n \"samples_ns\": [ 12296230581, 11598579142, 11816325692 ],\n \"samples_ts\": [ 41.6388, 44.1433, 43.3299 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:30:02Z", "avg_ns": 2034022711, "stddev_ns": 2954270, "avg_ts": 251.71829, "stddev_ts": 0.365412, "samples_ns": [ 2033450642, 2037220830, 2031396662 ], "samples_ts": [ 251.789, 251.323, 252.043 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T21:30:10Z", "avg_ns": 11903711805, "stddev_ns": 356940656, "avg_ts": 43.037332, "stddev_ts": 1.277652, "samples_ns": [ 12296230581, 11598579142, 11816325692 ], "samples_ts": [ 41.6388, 44.1433, 43.3299 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 127 }, { "timestamp_utc": "2025-12-08T21:30:57.690603+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:30:47Z\",\n \"avg_ns\": 495146577,\n \"stddev_ns\": 3576260,\n \"avg_ts\": 258.518270,\n \"stddev_ts\": 1.860833,\n \"samples_ns\": [ 492395102, 493855635, 499188995 ],\n \"samples_ts\": [ 259.954, 259.185, 256.416 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:30:49Z\",\n \"avg_ns\": 2846075809,\n \"stddev_ns\": 10977668,\n \"avg_ts\": 44.974651,\n \"stddev_ts\": 0.173091,\n \"samples_ns\": [ 2839225342, 2840264564, 2858737521 ],\n \"samples_ts\": [ 45.0827, 45.0662, 44.775 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:30:47Z", "avg_ns": 495146577, "stddev_ns": 3576260, "avg_ts": 258.51827, "stddev_ts": 1.860833, "samples_ns": [ 492395102, 493855635, 499188995 ], "samples_ts": [ 259.954, 259.185, 256.416 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T21:30:49Z", "avg_ns": 2846075809, "stddev_ns": 10977668, "avg_ts": 44.974651, "stddev_ts": 0.173091, "samples_ns": [ 2839225342, 2840264564, 2858737521 ], "samples_ts": [ 45.0827, 45.0662, 44.775 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 128 }, { "timestamp_utc": "2025-12-08T21:31:35.396792+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:30:58Z\",\n \"avg_ns\": 497603592,\n \"stddev_ns\": 636374,\n \"avg_ts\": 257.233150,\n \"stddev_ts\": 0.329056,\n \"samples_ns\": [ 496927806, 498191401, 497691569 ],\n \"samples_ts\": [ 257.583, 256.929, 257.187 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:31:00Z\",\n \"avg_ns\": 11676651924,\n \"stddev_ns\": 58743134,\n \"avg_ts\": 43.848926,\n \"stddev_ts\": 0.220775,\n \"samples_ns\": [ 11731972008, 11682984512, 11614999254 ],\n \"samples_ts\": [ 43.6414, 43.8244, 44.0809 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:30:58Z", "avg_ns": 497603592, "stddev_ns": 636374, "avg_ts": 257.23315, "stddev_ts": 0.329056, "samples_ns": [ 496927806, 498191401, 497691569 ], "samples_ts": [ 257.583, 256.929, 257.187 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T21:31:00Z", "avg_ns": 11676651924, "stddev_ns": 58743134, "avg_ts": 43.848926, "stddev_ts": 0.220775, "samples_ns": [ 11731972008, 11682984512, 11614999254 ], "samples_ts": [ 43.6414, 43.8244, 44.0809 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 129 }, { "timestamp_utc": "2025-12-08T21:31:53.098046+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:31:35Z\",\n \"avg_ns\": 2131013928,\n \"stddev_ns\": 678574,\n \"avg_ts\": 240.261233,\n \"stddev_ts\": 0.076315,\n \"samples_ns\": [ 2130658646, 2130588525, 2131794614 ],\n \"samples_ts\": [ 240.301, 240.309, 240.173 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:31:44Z\",\n \"avg_ns\": 2833632094,\n \"stddev_ns\": 7608090,\n \"avg_ts\": 45.171925,\n \"stddev_ts\": 0.121346,\n \"samples_ns\": [ 2834661406, 2825561937, 2840672940 ],\n \"samples_ts\": [ 45.1553, 45.3007, 45.0597 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:31:35Z", "avg_ns": 2131013928, "stddev_ns": 678574, "avg_ts": 240.261233, "stddev_ts": 0.076315, "samples_ns": [ 2130658646, 2130588525, 2131794614 ], "samples_ts": [ 240.301, 240.309, 240.173 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T21:31:44Z", "avg_ns": 2833632094, "stddev_ns": 7608090, "avg_ts": 45.171925, "stddev_ts": 0.121346, "samples_ns": [ 2834661406, 2825561937, 2840672940 ], "samples_ts": [ 45.1553, 45.3007, 45.0597 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 130 }, { "timestamp_utc": "2025-12-08T21:32:37.867332+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:31:53Z\",\n \"avg_ns\": 2133009732,\n \"stddev_ns\": 18306409,\n \"avg_ts\": 240.048170,\n \"stddev_ts\": 2.055234,\n \"samples_ns\": [ 2116777012, 2129400198, 2152851987 ],\n \"samples_ts\": [ 241.877, 240.443, 237.824 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:32:02Z\",\n \"avg_ns\": 11853239356,\n \"stddev_ns\": 328626586,\n \"avg_ts\": 43.216760,\n \"stddev_ts\": 1.180384,\n \"samples_ns\": [ 11701479541, 12230320446, 11627918081 ],\n \"samples_ts\": [ 43.7552, 41.8632, 44.032 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:31:53Z", "avg_ns": 2133009732, "stddev_ns": 18306409, "avg_ts": 240.04817, "stddev_ts": 2.055234, "samples_ns": [ 2116777012, 2129400198, 2152851987 ], "samples_ts": [ 241.877, 240.443, 237.824 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T21:32:02Z", "avg_ns": 11853239356, "stddev_ns": 328626586, "avg_ts": 43.21676, "stddev_ts": 1.180384, "samples_ns": [ 11701479541, 12230320446, 11627918081 ], "samples_ts": [ 43.7552, 41.8632, 44.032 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 131 }, { "timestamp_utc": "2025-12-08T21:32:48.974028+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:32:38Z\",\n \"avg_ns\": 492306084,\n \"stddev_ns\": 227309,\n \"avg_ts\": 260.000886,\n \"stddev_ts\": 0.120078,\n \"samples_ns\": [ 492408910, 492463811, 492045531 ],\n \"samples_ts\": [ 259.947, 259.918, 260.139 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:32:40Z\",\n \"avg_ns\": 2821223358,\n \"stddev_ns\": 19775784,\n \"avg_ts\": 45.371881,\n \"stddev_ts\": 0.319321,\n \"samples_ns\": [ 2798413426, 2831699930, 2833556718 ],\n \"samples_ts\": [ 45.7402, 45.2025, 45.1729 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:32:38Z", "avg_ns": 492306084, "stddev_ns": 227309, "avg_ts": 260.000886, "stddev_ts": 0.120078, "samples_ns": [ 492408910, 492463811, 492045531 ], "samples_ts": [ 259.947, 259.918, 260.139 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T21:32:40Z", "avg_ns": 2821223358, "stddev_ns": 19775784, "avg_ts": 45.371881, "stddev_ts": 0.319321, "samples_ns": [ 2798413426, 2831699930, 2833556718 ], "samples_ts": [ 45.7402, 45.2025, 45.1729 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 132 }, { "timestamp_utc": "2025-12-08T21:33:26.742172+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:32:49Z\",\n \"avg_ns\": 492659190,\n \"stddev_ns\": 2567802,\n \"avg_ts\": 259.819216,\n \"stddev_ts\": 1.358140,\n \"samples_ns\": [ 493996268, 494282331, 489698973 ],\n \"samples_ts\": [ 259.111, 258.961, 261.385 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:32:51Z\",\n \"avg_ns\": 11708273903,\n \"stddev_ns\": 6953655,\n \"avg_ts\": 43.729770,\n \"stddev_ts\": 0.025968,\n \"samples_ns\": [ 11700935313, 11709125626, 11714760772 ],\n \"samples_ts\": [ 43.7572, 43.7266, 43.7055 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:32:49Z", "avg_ns": 492659190, "stddev_ns": 2567802, "avg_ts": 259.819216, "stddev_ts": 1.35814, "samples_ns": [ 493996268, 494282331, 489698973 ], "samples_ts": [ 259.111, 258.961, 261.385 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T21:32:51Z", "avg_ns": 11708273903, "stddev_ns": 6953655, "avg_ts": 43.72977, "stddev_ts": 0.025968, "samples_ns": [ 11700935313, 11709125626, 11714760772 ], "samples_ts": [ 43.7572, 43.7266, 43.7055 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 133 }, { "timestamp_utc": "2025-12-08T21:33:44.627268+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:33:27Z\",\n \"avg_ns\": 2213495698,\n \"stddev_ns\": 341297486,\n \"avg_ts\": 234.724549,\n \"stddev_ts\": 33.234012,\n \"samples_ns\": [ 2014591218, 2607586235, 2018309643 ],\n \"samples_ts\": [ 254.146, 196.35, 253.678 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:33:36Z\",\n \"avg_ns\": 2847341717,\n \"stddev_ns\": 12362359,\n \"avg_ts\": 44.954776,\n \"stddev_ts\": 0.195163,\n \"samples_ns\": [ 2847179094, 2859784356, 2835061703 ],\n \"samples_ts\": [ 44.9568, 44.7586, 45.1489 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:33:27Z", "avg_ns": 2213495698, "stddev_ns": 341297486, "avg_ts": 234.724549, "stddev_ts": 33.234012, "samples_ns": [ 2014591218, 2607586235, 2018309643 ], "samples_ts": [ 254.146, 196.35, 253.678 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T21:33:36Z", "avg_ns": 2847341717, "stddev_ns": 12362359, "avg_ts": 44.954776, "stddev_ts": 0.195163, "samples_ns": [ 2847179094, 2859784356, 2835061703 ], "samples_ts": [ 44.9568, 44.7586, 45.1489 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 134 }, { "timestamp_utc": "2025-12-08T21:34:29.084742+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:33:45Z\",\n \"avg_ns\": 2019766989,\n \"stddev_ns\": 2737778,\n \"avg_ts\": 253.494898,\n \"stddev_ts\": 0.343475,\n \"samples_ns\": [ 2017347527, 2019214595, 2022738845 ],\n \"samples_ts\": [ 253.799, 253.564, 253.122 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:33:53Z\",\n \"avg_ns\": 11899458025,\n \"stddev_ns\": 310071070,\n \"avg_ts\": 43.046367,\n \"stddev_ts\": 1.105087,\n \"samples_ns\": [ 11714541883, 11726400448, 12257431746 ],\n \"samples_ts\": [ 43.7064, 43.6622, 41.7706 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:33:45Z", "avg_ns": 2019766989, "stddev_ns": 2737778, "avg_ts": 253.494898, "stddev_ts": 0.343475, "samples_ns": [ 2017347527, 2019214595, 2022738845 ], "samples_ts": [ 253.799, 253.564, 253.122 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T21:33:53Z", "avg_ns": 11899458025, "stddev_ns": 310071070, "avg_ts": 43.046367, "stddev_ts": 1.105087, "samples_ns": [ 11714541883, 11726400448, 12257431746 ], "samples_ts": [ 43.7064, 43.6622, 41.7706 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 135 }, { "timestamp_utc": "2025-12-08T21:34:40.277357+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:34:29Z\",\n \"avg_ns\": 497164625,\n \"stddev_ns\": 1179444,\n \"avg_ts\": 257.460958,\n \"stddev_ts\": 0.611487,\n \"samples_ns\": [ 495807783, 497743920, 497942173 ],\n \"samples_ts\": [ 258.165, 257.16, 257.058 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:34:31Z\",\n \"avg_ns\": 2845901419,\n \"stddev_ns\": 6293717,\n \"avg_ts\": 44.977109,\n \"stddev_ts\": 0.099451,\n \"samples_ns\": [ 2852344717, 2839768789, 2845590751 ],\n \"samples_ts\": [ 44.8754, 45.0741, 44.9819 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:34:29Z", "avg_ns": 497164625, "stddev_ns": 1179444, "avg_ts": 257.460958, "stddev_ts": 0.611487, "samples_ns": [ 495807783, 497743920, 497942173 ], "samples_ts": [ 258.165, 257.16, 257.058 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T21:34:31Z", "avg_ns": 2845901419, "stddev_ns": 6293717, "avg_ts": 44.977109, "stddev_ts": 0.099451, "samples_ns": [ 2852344717, 2839768789, 2845590751 ], "samples_ts": [ 44.8754, 45.0741, 44.9819 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 136 }, { "timestamp_utc": "2025-12-08T21:35:18.475128+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:34:40Z\",\n \"avg_ns\": 494383981,\n \"stddev_ns\": 2048120,\n \"avg_ts\": 258.911033,\n \"stddev_ts\": 1.074638,\n \"samples_ns\": [ 492063104, 495152051, 495936790 ],\n \"samples_ts\": [ 260.129, 258.506, 258.097 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:34:42Z\",\n \"avg_ns\": 11851095680,\n \"stddev_ns\": 335900617,\n \"avg_ts\": 43.225535,\n \"stddev_ts\": 1.205519,\n \"samples_ns\": [ 12238779371, 11667512004, 11646995666 ],\n \"samples_ts\": [ 41.8342, 43.8825, 43.9598 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:34:40Z", "avg_ns": 494383981, "stddev_ns": 2048120, "avg_ts": 258.911033, "stddev_ts": 1.074638, "samples_ns": [ 492063104, 495152051, 495936790 ], "samples_ts": [ 260.129, 258.506, 258.097 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T21:34:42Z", "avg_ns": 11851095680, "stddev_ns": 335900617, "avg_ts": 43.225535, "stddev_ts": 1.205519, "samples_ns": [ 12238779371, 11667512004, 11646995666 ], "samples_ts": [ 41.8342, 43.8825, 43.9598 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 137 }, { "timestamp_utc": "2025-12-08T21:35:36.388266+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:35:19Z\",\n \"avg_ns\": 2218036957,\n \"stddev_ns\": 324995986,\n \"avg_ts\": 233.922858,\n \"stddev_ts\": 31.614752,\n \"samples_ns\": [ 2040189460, 2593142620, 2020778793 ],\n \"samples_ts\": [ 250.957, 197.444, 253.368 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:35:27Z\",\n \"avg_ns\": 2850094443,\n \"stddev_ns\": 7246402,\n \"avg_ts\": 44.910986,\n \"stddev_ts\": 0.114348,\n \"samples_ns\": [ 2854398894, 2841728650, 2854155787 ],\n \"samples_ts\": [ 44.8431, 45.043, 44.8469 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:35:19Z", "avg_ns": 2218036957, "stddev_ns": 324995986, "avg_ts": 233.922858, "stddev_ts": 31.614752, "samples_ns": [ 2040189460, 2593142620, 2020778793 ], "samples_ts": [ 250.957, 197.444, 253.368 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T21:35:27Z", "avg_ns": 2850094443, "stddev_ns": 7246402, "avg_ts": 44.910986, "stddev_ts": 0.114348, "samples_ns": [ 2854398894, 2841728650, 2854155787 ], "samples_ts": [ 44.8431, 45.043, 44.8469 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 138 }, { "timestamp_utc": "2025-12-08T21:36:20.109917+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:35:36Z\",\n \"avg_ns\": 2013341637,\n \"stddev_ns\": 10303135,\n \"avg_ts\": 254.308037,\n \"stddev_ts\": 1.304526,\n \"samples_ns\": [ 2021303957, 2001705041, 2017015913 ],\n \"samples_ts\": [ 253.302, 255.782, 253.84 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:35:45Z\",\n \"avg_ns\": 11670047527,\n \"stddev_ns\": 48594018,\n \"avg_ts\": 43.873506,\n \"stddev_ts\": 0.182257,\n \"samples_ns\": [ 11639168138, 11726060790, 11644913654 ],\n \"samples_ts\": [ 43.9894, 43.6634, 43.9677 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:35:36Z", "avg_ns": 2013341637, "stddev_ns": 10303135, "avg_ts": 254.308037, "stddev_ts": 1.304526, "samples_ns": [ 2021303957, 2001705041, 2017015913 ], "samples_ts": [ 253.302, 255.782, 253.84 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T21:35:45Z", "avg_ns": 11670047527, "stddev_ns": 48594018, "avg_ts": 43.873506, "stddev_ts": 0.182257, "samples_ns": [ 11639168138, 11726060790, 11644913654 ], "samples_ts": [ 43.9894, 43.6634, 43.9677 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 139 }, { "timestamp_utc": "2025-12-08T21:36:31.212696+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:36:20Z\",\n \"avg_ns\": 493058298,\n \"stddev_ns\": 1957911,\n \"avg_ts\": 259.606915,\n \"stddev_ts\": 1.029410,\n \"samples_ns\": [ 491403877, 495219895, 492551122 ],\n \"samples_ts\": [ 260.478, 258.471, 259.872 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:36:22Z\",\n \"avg_ns\": 2821436253,\n \"stddev_ns\": 14894183,\n \"avg_ts\": 45.367807,\n \"stddev_ts\": 0.239134,\n \"samples_ns\": [ 2808238428, 2818485458, 2837584874 ],\n \"samples_ts\": [ 45.5802, 45.4145, 45.1088 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:36:20Z", "avg_ns": 493058298, "stddev_ns": 1957911, "avg_ts": 259.606915, "stddev_ts": 1.02941, "samples_ns": [ 491403877, 495219895, 492551122 ], "samples_ts": [ 260.478, 258.471, 259.872 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T21:36:22Z", "avg_ns": 2821436253, "stddev_ns": 14894183, "avg_ts": 45.367807, "stddev_ts": 0.239134, "samples_ns": [ 2808238428, 2818485458, 2837584874 ], "samples_ts": [ 45.5802, 45.4145, 45.1088 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 140 }, { "timestamp_utc": "2025-12-08T21:37:09.300701+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:36:31Z\",\n \"avg_ns\": 498579047,\n \"stddev_ns\": 4805668,\n \"avg_ts\": 256.745539,\n \"stddev_ts\": 2.480379,\n \"samples_ns\": [ 493432938, 502949774, 499354431 ],\n \"samples_ts\": [ 259.407, 254.499, 256.331 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:36:33Z\",\n \"avg_ns\": 11809875581,\n \"stddev_ns\": 379385205,\n \"avg_ts\": 43.382852,\n \"stddev_ts\": 1.368343,\n \"samples_ns\": [ 11581540177, 11600268161, 12247818406 ],\n \"samples_ts\": [ 44.2083, 44.1369, 41.8034 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:36:31Z", "avg_ns": 498579047, "stddev_ns": 4805668, "avg_ts": 256.745539, "stddev_ts": 2.480379, "samples_ns": [ 493432938, 502949774, 499354431 ], "samples_ts": [ 259.407, 254.499, 256.331 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T21:36:33Z", "avg_ns": 11809875581, "stddev_ns": 379385205, "avg_ts": 43.382852, "stddev_ts": 1.368343, "samples_ns": [ 11581540177, 11600268161, 12247818406 ], "samples_ts": [ 44.2083, 44.1369, 41.8034 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 141 }, { "timestamp_utc": "2025-12-08T21:37:28.372808+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:37:09Z\",\n \"avg_ns\": 2322246253,\n \"stddev_ns\": 246198916,\n \"avg_ts\": 222.076480,\n \"stddev_ts\": 22.666421,\n \"samples_ns\": [ 2251509452, 2596070384, 2119158923 ],\n \"samples_ts\": [ 227.403, 197.221, 241.605 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:37:18Z\",\n \"avg_ns\": 3099381827,\n \"stddev_ns\": 7733273,\n \"avg_ts\": 41.298729,\n \"stddev_ts\": 0.103008,\n \"samples_ns\": [ 3107416910, 3098737031, 3091991542 ],\n \"samples_ts\": [ 41.1918, 41.3072, 41.3973 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:37:09Z", "avg_ns": 2322246253, "stddev_ns": 246198916, "avg_ts": 222.07648, "stddev_ts": 22.666421, "samples_ns": [ 2251509452, 2596070384, 2119158923 ], "samples_ts": [ 227.403, 197.221, 241.605 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T21:37:18Z", "avg_ns": 3099381827, "stddev_ns": 7733273, "avg_ts": 41.298729, "stddev_ts": 0.103008, "samples_ns": [ 3107416910, 3098737031, 3091991542 ], "samples_ts": [ 41.1918, 41.3072, 41.3973 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 142 }, { "timestamp_utc": "2025-12-08T21:38:15.889526+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:37:28Z\",\n \"avg_ns\": 2326568199,\n \"stddev_ns\": 354342168,\n \"avg_ts\": 223.239924,\n \"stddev_ts\": 31.253433,\n \"samples_ns\": [ 2118466032, 2125531618, 2735706949 ],\n \"samples_ts\": [ 241.684, 240.881, 187.155 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:37:38Z\",\n \"avg_ns\": 12581414999,\n \"stddev_ns\": 347347046,\n \"avg_ts\": 40.715313,\n \"stddev_ts\": 1.106597,\n \"samples_ns\": [ 12365380154, 12396777926, 12982086919 ],\n \"samples_ts\": [ 41.4059, 41.3011, 39.439 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:37:28Z", "avg_ns": 2326568199, "stddev_ns": 354342168, "avg_ts": 223.239924, "stddev_ts": 31.253433, "samples_ns": [ 2118466032, 2125531618, 2735706949 ], "samples_ts": [ 241.684, 240.881, 187.155 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_type": "gemma3 270M Q8_0", "model_size": 285018624, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T21:37:38Z", "avg_ns": 12581414999, "stddev_ns": 347347046, "avg_ts": 40.715313, "stddev_ts": 1.106597, "samples_ns": [ 12365380154, 12396777926, 12982086919 ], "samples_ts": [ 41.4059, 41.3011, 39.439 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 143 }, { "timestamp_utc": "2025-12-08T21:38:35.528304+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:38:17Z\",\n \"avg_ns\": 987494991,\n \"stddev_ns\": 1777260,\n \"avg_ts\": 129.621190,\n \"stddev_ts\": 0.233136,\n \"samples_ns\": [ 989457186, 985993320, 987034467 ],\n \"samples_ts\": [ 129.364, 129.818, 129.681 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:38:21Z\",\n \"avg_ns\": 4723068809,\n \"stddev_ns\": 399310868,\n \"avg_ts\": 27.224723,\n \"stddev_ts\": 2.194691,\n \"samples_ns\": [ 5184108066, 4498140943, 4486957419 ],\n \"samples_ts\": [ 24.6908, 28.4562, 28.5271 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:38:17Z", "avg_ns": 987494991, "stddev_ns": 1777260, "avg_ts": 129.62119, "stddev_ts": 0.233136, "samples_ns": [ 989457186, 985993320, 987034467 ], "samples_ts": [ 129.364, 129.818, 129.681 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T21:38:21Z", "avg_ns": 4723068809, "stddev_ns": 399310868, "avg_ts": 27.224723, "stddev_ts": 2.194691, "samples_ns": [ 5184108066, 4498140943, 4486957419 ], "samples_ts": [ 24.6908, 28.4562, 28.5271 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 512, "ubatch_size": 128, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 144 }, { "timestamp_utc": "2025-12-08T21:39:35.378911+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:38:36Z\",\n \"avg_ns\": 990718959,\n \"stddev_ns\": 985425,\n \"avg_ts\": 129.199187,\n \"stddev_ts\": 0.128308,\n \"samples_ns\": [ 990047924, 991849062, 990259893 ],\n \"samples_ts\": [ 129.287, 129.052, 129.259 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:38:40Z\",\n \"avg_ns\": 18386325318,\n \"stddev_ns\": 57390748,\n \"avg_ts\": 27.846963,\n \"stddev_ts\": 0.086991,\n \"samples_ns\": [ 18324516767, 18437929239, 18396529948 ],\n \"samples_ts\": [ 27.9407, 27.7688, 27.8313 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:38:36Z", "avg_ns": 990718959, "stddev_ns": 985425, "avg_ts": 129.199187, "stddev_ts": 0.128308, "samples_ns": [ 990047924, 991849062, 990259893 ], "samples_ts": [ 129.287, 129.052, 129.259 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T21:38:40Z", "avg_ns": 18386325318, "stddev_ns": 57390748, "avg_ts": 27.846963, "stddev_ts": 0.086991, "samples_ns": [ 18324516767, 18437929239, 18396529948 ], "samples_ts": [ 27.9407, 27.7688, 27.8313 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 512, "ubatch_size": 128, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 145 }, { "timestamp_utc": "2025-12-08T21:40:06.020941+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:39:35Z\",\n \"avg_ns\": 4058196063,\n \"stddev_ns\": 5440965,\n \"avg_ts\": 126.164583,\n \"stddev_ts\": 0.169278,\n \"samples_ns\": [ 4051948251, 4060747480, 4061892458 ],\n \"samples_ts\": [ 126.359, 126.085, 126.05 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:39:52Z\",\n \"avg_ns\": 4561687431,\n \"stddev_ns\": 22600214,\n \"avg_ts\": 28.060252,\n \"stddev_ts\": 0.139226,\n \"samples_ns\": [ 4566364498, 4537114714, 4581583082 ],\n \"samples_ts\": [ 28.0311, 28.2118, 27.9379 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:39:35Z", "avg_ns": 4058196063, "stddev_ns": 5440965, "avg_ts": 126.164583, "stddev_ts": 0.169278, "samples_ns": [ 4051948251, 4060747480, 4061892458 ], "samples_ts": [ 126.359, 126.085, 126.05 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T21:39:52Z", "avg_ns": 4561687431, "stddev_ns": 22600214, "avg_ts": 28.060252, "stddev_ts": 0.139226, "samples_ns": [ 4566364498, 4537114714, 4581583082 ], "samples_ts": [ 28.0311, 28.2118, 27.9379 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 512, "ubatch_size": 128, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 146 }, { "timestamp_utc": "2025-12-08T21:41:23.434478+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:40:06Z\",\n \"avg_ns\": 4210017791,\n \"stddev_ns\": 143844573,\n \"avg_ts\": 121.710130,\n \"stddev_ts\": 4.190766,\n \"samples_ns\": [ 4230652019, 4342430943, 4056970412 ],\n \"samples_ts\": [ 121.022, 117.906, 126.203 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:40:23Z\",\n \"avg_ns\": 20002618365,\n \"stddev_ns\": 31471970,\n \"avg_ts\": 25.596691,\n \"stddev_ts\": 0.040247,\n \"samples_ns\": [ 19976950205, 19993174114, 20037730777 ],\n \"samples_ts\": [ 25.6295, 25.6087, 25.5518 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:40:06Z", "avg_ns": 4210017791, "stddev_ns": 143844573, "avg_ts": 121.71013, "stddev_ts": 4.190766, "samples_ns": [ 4230652019, 4342430943, 4056970412 ], "samples_ts": [ 121.022, 117.906, 126.203 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T21:40:23Z", "avg_ns": 20002618365, "stddev_ns": 31471970, "avg_ts": 25.596691, "stddev_ts": 0.040247, "samples_ns": [ 19976950205, 19993174114, 20037730777 ], "samples_ts": [ 25.6295, 25.6087, 25.5518 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 512, "ubatch_size": 128, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 147 }, { "timestamp_utc": "2025-12-08T21:41:41.688138+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:41:24Z\",\n \"avg_ns\": 991722721,\n \"stddev_ns\": 2856165,\n \"avg_ts\": 129.069049,\n \"stddev_ts\": 0.372164,\n \"samples_ns\": [ 993954209, 988504349, 992709607 ],\n \"samples_ts\": [ 128.779, 129.489, 128.94 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:41:28Z\",\n \"avg_ns\": 4528113471,\n \"stddev_ns\": 43694907,\n \"avg_ts\": 28.269588,\n \"stddev_ts\": 0.271301,\n \"samples_ns\": [ 4578495791, 4505258296, 4500586327 ],\n \"samples_ts\": [ 27.9568, 28.4112, 28.4407 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:41:24Z", "avg_ns": 991722721, "stddev_ns": 2856165, "avg_ts": 129.069049, "stddev_ts": 0.372164, "samples_ns": [ 993954209, 988504349, 992709607 ], "samples_ts": [ 128.779, 129.489, 128.94 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T21:41:28Z", "avg_ns": 4528113471, "stddev_ns": 43694907, "avg_ts": 28.269588, "stddev_ts": 0.271301, "samples_ns": [ 4578495791, 4505258296, 4500586327 ], "samples_ts": [ 27.9568, 28.4112, 28.4407 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 512, "ubatch_size": 256, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 148 }, { "timestamp_utc": "2025-12-08T21:42:42.224977+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:41:42Z\",\n \"avg_ns\": 989668256,\n \"stddev_ns\": 132773,\n \"avg_ts\": 129.336271,\n \"stddev_ts\": 0.017353,\n \"samples_ns\": [ 989732589, 989756608, 989515571 ],\n \"samples_ts\": [ 129.328, 129.325, 129.356 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:41:46Z\",\n \"avg_ns\": 18628611507,\n \"stddev_ns\": 452043514,\n \"avg_ts\": 27.495248,\n \"stddev_ts\": 0.658013,\n \"samples_ns\": [ 19150408308, 18355905791, 18379520422 ],\n \"samples_ts\": [ 26.7357, 27.8929, 27.8571 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:41:42Z", "avg_ns": 989668256, "stddev_ns": 132773, "avg_ts": 129.336271, "stddev_ts": 0.017353, "samples_ns": [ 989732589, 989756608, 989515571 ], "samples_ts": [ 129.328, 129.325, 129.356 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T21:41:46Z", "avg_ns": 18628611507, "stddev_ns": 452043514, "avg_ts": 27.495248, "stddev_ts": 0.658013, "samples_ns": [ 19150408308, 18355905791, 18379520422 ], "samples_ts": [ 26.7357, 27.8929, 27.8571 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 512, "ubatch_size": 256, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 149 }, { "timestamp_utc": "2025-12-08T21:43:13.580068+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:42:42Z\",\n \"avg_ns\": 4086252867,\n \"stddev_ns\": 625248,\n \"avg_ts\": 125.298170,\n \"stddev_ts\": 0.019072,\n \"samples_ns\": [ 4086306283, 4086846409, 4085605910 ],\n \"samples_ts\": [ 125.297, 125.28, 125.318 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:42:59Z\",\n \"avg_ns\": 4764047482,\n \"stddev_ns\": 395998215,\n \"avg_ts\": 26.986580,\n \"stddev_ts\": 2.141528,\n \"samples_ns\": [ 5220828403, 4517547072, 4553766973 ],\n \"samples_ts\": [ 24.5172, 28.334, 28.1086 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:42:42Z", "avg_ns": 4086252867, "stddev_ns": 625248, "avg_ts": 125.29817, "stddev_ts": 0.019072, "samples_ns": [ 4086306283, 4086846409, 4085605910 ], "samples_ts": [ 125.297, 125.28, 125.318 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T21:42:59Z", "avg_ns": 4764047482, "stddev_ns": 395998215, "avg_ts": 26.98658, "stddev_ts": 2.141528, "samples_ns": [ 5220828403, 4517547072, 4553766973 ], "samples_ts": [ 24.5172, 28.334, 28.1086 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 512, "ubatch_size": 256, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 150 }, { "timestamp_utc": "2025-12-08T21:44:26.934350+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:43:14Z\",\n \"avg_ns\": 4217878132,\n \"stddev_ns\": 254522450,\n \"avg_ts\": 121.673477,\n \"stddev_ts\": 7.095048,\n \"samples_ns\": [ 4071504411, 4511774576, 4070355411 ],\n \"samples_ts\": [ 125.752, 113.481, 125.788 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:43:30Z\",\n \"avg_ns\": 18640692053,\n \"stddev_ns\": 457554472,\n \"avg_ts\": 27.477681,\n \"stddev_ts\": 0.665463,\n \"samples_ns\": [ 19166469045, 18422803477, 18332803638 ],\n \"samples_ts\": [ 26.7133, 27.7916, 27.9281 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:43:14Z", "avg_ns": 4217878132, "stddev_ns": 254522450, "avg_ts": 121.673477, "stddev_ts": 7.095048, "samples_ns": [ 4071504411, 4511774576, 4070355411 ], "samples_ts": [ 125.752, 113.481, 125.788 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T21:43:30Z", "avg_ns": 18640692053, "stddev_ns": 457554472, "avg_ts": 27.477681, "stddev_ts": 0.665463, "samples_ns": [ 19166469045, 18422803477, 18332803638 ], "samples_ts": [ 26.7133, 27.7916, 27.9281 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 512, "ubatch_size": 256, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 151 }, { "timestamp_utc": "2025-12-08T21:44:45.149899+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:44:27Z\",\n \"avg_ns\": 992457546,\n \"stddev_ns\": 165154,\n \"avg_ts\": 128.972774,\n \"stddev_ts\": 0.021463,\n \"samples_ns\": [ 992480818, 992281990, 992609830 ],\n \"samples_ts\": [ 128.97, 128.996, 128.953 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:44:31Z\",\n \"avg_ns\": 4520743498,\n \"stddev_ns\": 49380672,\n \"avg_ts\": 28.316174,\n \"stddev_ts\": 0.308642,\n \"samples_ns\": [ 4573140561, 4514022060, 4475067874 ],\n \"samples_ts\": [ 27.9895, 28.3561, 28.6029 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:44:27Z", "avg_ns": 992457546, "stddev_ns": 165154, "avg_ts": 128.972774, "stddev_ts": 0.021463, "samples_ns": [ 992480818, 992281990, 992609830 ], "samples_ts": [ 128.97, 128.996, 128.953 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T21:44:31Z", "avg_ns": 4520743498, "stddev_ns": 49380672, "avg_ts": 28.316174, "stddev_ts": 0.308642, "samples_ns": [ 4573140561, 4514022060, 4475067874 ], "samples_ts": [ 27.9895, 28.3561, 28.6029 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 512, "ubatch_size": 512, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 152 }, { "timestamp_utc": "2025-12-08T21:45:45.486557+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:44:45Z\",\n \"avg_ns\": 991256536,\n \"stddev_ns\": 3687146,\n \"avg_ts\": 129.130228,\n \"stddev_ts\": 0.481278,\n \"samples_ns\": [ 993019932, 993730565, 987019113 ],\n \"samples_ts\": [ 128.9, 128.808, 129.683 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:44:49Z\",\n \"avg_ns\": 18554085295,\n \"stddev_ns\": 3063074679,\n \"avg_ts\": 27.603400,\n \"stddev_ts\": 0.586111,\n \"samples_ns\": [ 19014226630, 18307986524, 18340042732 ],\n \"samples_ts\": [ 26.9272, 27.9659, 27.9171 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:44:45Z", "avg_ns": 991256536, "stddev_ns": 3687146, "avg_ts": 129.130228, "stddev_ts": 0.481278, "samples_ns": [ 993019932, 993730565, 987019113 ], "samples_ts": [ 128.9, 128.808, 129.683 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T21:44:49Z", "avg_ns": 18554085295, "stddev_ns": 3063074679, "avg_ts": 27.6034, "stddev_ts": 0.586111, "samples_ns": [ 19014226630, 18307986524, 18340042732 ], "samples_ts": [ 26.9272, 27.9659, 27.9171 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 512, "ubatch_size": 512, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 153 }, { "timestamp_utc": "2025-12-08T21:46:18.186812+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:45:46Z\",\n \"avg_ns\": 4397510845,\n \"stddev_ns\": 249463850,\n \"avg_ts\": 116.671907,\n \"stddev_ts\": 6.409644,\n \"samples_ns\": [ 4245976687, 4685434144, 4261121704 ],\n \"samples_ts\": [ 120.585, 109.275, 120.156 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:46:03Z\",\n \"avg_ns\": 4848237462,\n \"stddev_ns\": 17196459,\n \"avg_ts\": 26.401569,\n \"stddev_ts\": 0.093835,\n \"samples_ns\": [ 4858493607, 4857834103, 4828384678 ],\n \"samples_ts\": [ 26.3456, 26.3492, 26.5099 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:45:46Z", "avg_ns": 4397510845, "stddev_ns": 249463850, "avg_ts": 116.671907, "stddev_ts": 6.409644, "samples_ns": [ 4245976687, 4685434144, 4261121704 ], "samples_ts": [ 120.585, 109.275, 120.156 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T21:46:03Z", "avg_ns": 4848237462, "stddev_ns": 17196459, "avg_ts": 26.401569, "stddev_ts": 0.093835, "samples_ns": [ 4858493607, 4857834103, 4828384678 ], "samples_ts": [ 26.3456, 26.3492, 26.5099 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 512, "ubatch_size": 512, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 154 }, { "timestamp_utc": "2025-12-08T21:47:32.242642+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:46:18Z\",\n \"avg_ns\": 4254952396,\n \"stddev_ns\": 10673091,\n \"avg_ts\": 120.330875,\n \"stddev_ts\": 0.301495,\n \"samples_ns\": [ 4251301533, 4246583796, 4266971859 ],\n \"samples_ts\": [ 120.434, 120.568, 119.991 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:46:36Z\",\n \"avg_ns\": 18620284032,\n \"stddev_ns\": 379243439,\n \"avg_ts\": 27.504417,\n \"stddev_ts\": 0.553968,\n \"samples_ns\": [ 19056062368, 18439791382, 18364998347 ],\n \"samples_ts\": [ 26.8681, 27.766, 27.8791 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:46:18Z", "avg_ns": 4254952396, "stddev_ns": 10673091, "avg_ts": 120.330875, "stddev_ts": 0.301495, "samples_ns": [ 4251301533, 4246583796, 4266971859 ], "samples_ts": [ 120.434, 120.568, 119.991 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T21:46:36Z", "avg_ns": 18620284032, "stddev_ns": 379243439, "avg_ts": 27.504417, "stddev_ts": 0.553968, "samples_ns": [ 19056062368, 18439791382, 18364998347 ], "samples_ts": [ 26.8681, 27.766, 27.8791 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 512, "ubatch_size": 512, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 155 }, { "timestamp_utc": "2025-12-08T21:47:51.105243+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:47:32Z\",\n \"avg_ns\": 990068434,\n \"stddev_ns\": 1613738,\n \"avg_ts\": 129.284222,\n \"stddev_ts\": 0.210849,\n \"samples_ns\": [ 991293932, 990670913, 988240458 ],\n \"samples_ts\": [ 129.124, 129.205, 129.523 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:47:36Z\",\n \"avg_ns\": 4727261380,\n \"stddev_ns\": 361506748,\n \"avg_ts\": 27.178606,\n \"stddev_ts\": 1.993348,\n \"samples_ns\": [ 5143331727, 4490050528, 4548401887 ],\n \"samples_ts\": [ 24.8866, 28.5075, 28.1418 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:47:32Z", "avg_ns": 990068434, "stddev_ns": 1613738, "avg_ts": 129.284222, "stddev_ts": 0.210849, "samples_ns": [ 991293932, 990670913, 988240458 ], "samples_ts": [ 129.124, 129.205, 129.523 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T21:47:36Z", "avg_ns": 4727261380, "stddev_ns": 361506748, "avg_ts": 27.178606, "stddev_ts": 1.993348, "samples_ns": [ 5143331727, 4490050528, 4548401887 ], "samples_ts": [ 24.8866, 28.5075, 28.1418 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 156 }, { "timestamp_utc": "2025-12-08T21:48:51.258705+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:47:51Z\",\n \"avg_ns\": 990262479,\n \"stddev_ns\": 1381677,\n \"avg_ts\": 129.258827,\n \"stddev_ts\": 0.180317,\n \"samples_ns\": [ 991619357, 988857952, 990310129 ],\n \"samples_ts\": [ 129.082, 129.442, 129.252 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:47:55Z\",\n \"avg_ns\": 18496234806,\n \"stddev_ns\": 89605302,\n \"avg_ts\": 27.681743,\n \"stddev_ts\": 0.134365,\n \"samples_ns\": [ 18569972118, 18522224855, 18396507445 ],\n \"samples_ts\": [ 27.5714, 27.6425, 27.8314 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:47:51Z", "avg_ns": 990262479, "stddev_ns": 1381677, "avg_ts": 129.258827, "stddev_ts": 0.180317, "samples_ns": [ 991619357, 988857952, 990310129 ], "samples_ts": [ 129.082, 129.442, 129.252 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T21:47:55Z", "avg_ns": 18496234806, "stddev_ns": 89605302, "avg_ts": 27.681743, "stddev_ts": 0.134365, "samples_ns": [ 18569972118, 18522224855, 18396507445 ], "samples_ts": [ 27.5714, 27.6425, 27.8314 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 157 }, { "timestamp_utc": "2025-12-08T21:49:22.131342+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:48:51Z\",\n \"avg_ns\": 4227170487,\n \"stddev_ns\": 261673108,\n \"avg_ts\": 121.420792,\n \"stddev_ts\": 7.259472,\n \"samples_ns\": [ 4064423308, 4088072257, 4529015898 ],\n \"samples_ts\": [ 125.971, 125.242, 113.049 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:49:08Z\",\n \"avg_ns\": 4476731998,\n \"stddev_ns\": 17313626,\n \"avg_ts\": 28.592571,\n \"stddev_ts\": 0.110689,\n \"samples_ns\": [ 4492353653, 4458116792, 4479725549 ],\n \"samples_ts\": [ 28.4929, 28.7117, 28.5732 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:48:51Z", "avg_ns": 4227170487, "stddev_ns": 261673108, "avg_ts": 121.420792, "stddev_ts": 7.259472, "samples_ns": [ 4064423308, 4088072257, 4529015898 ], "samples_ts": [ 125.971, 125.242, 113.049 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T21:49:08Z", "avg_ns": 4476731998, "stddev_ns": 17313626, "avg_ts": 28.592571, "stddev_ts": 0.110689, "samples_ns": [ 4492353653, 4458116792, 4479725549 ], "samples_ts": [ 28.4929, 28.7117, 28.5732 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 158 }, { "timestamp_utc": "2025-12-08T21:50:35.246565+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:49:22Z\",\n \"avg_ns\": 4080021160,\n \"stddev_ns\": 10631483,\n \"avg_ts\": 125.490112,\n \"stddev_ts\": 0.326543,\n \"samples_ns\": [ 4072327929, 4075583398, 4092152155 ],\n \"samples_ts\": [ 125.727, 125.626, 125.118 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:49:39Z\",\n \"avg_ns\": 18690826970,\n \"stddev_ns\": 380262909,\n \"avg_ts\": 27.400590,\n \"stddev_ts\": 0.550995,\n \"samples_ns\": [ 18466372479, 19129879882, 18476228549 ],\n \"samples_ts\": [ 27.7261, 26.7644, 27.7113 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:49:22Z", "avg_ns": 4080021160, "stddev_ns": 10631483, "avg_ts": 125.490112, "stddev_ts": 0.326543, "samples_ns": [ 4072327929, 4075583398, 4092152155 ], "samples_ts": [ 125.727, 125.626, 125.118 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T21:49:39Z", "avg_ns": 18690826970, "stddev_ns": 380262909, "avg_ts": 27.40059, "stddev_ts": 0.550995, "samples_ns": [ 18466372479, 19129879882, 18476228549 ], "samples_ts": [ 27.7261, 26.7644, 27.7113 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 159 }, { "timestamp_utc": "2025-12-08T21:50:54.009058+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:50:35Z\",\n \"avg_ns\": 988065624,\n \"stddev_ns\": 1265457,\n \"avg_ts\": 129.546193,\n \"stddev_ts\": 0.165791,\n \"samples_ns\": [ 989456475, 987756910, 986983488 ],\n \"samples_ts\": [ 129.364, 129.587, 129.688 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:50:39Z\",\n \"avg_ns\": 4698087142,\n \"stddev_ns\": 369479415,\n \"avg_ts\": 27.353019,\n \"stddev_ts\": 2.057756,\n \"samples_ns\": [ 4482331300, 5124715901, 4487214226 ],\n \"samples_ts\": [ 28.5566, 24.977, 28.5255 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:50:35Z", "avg_ns": 988065624, "stddev_ns": 1265457, "avg_ts": 129.546193, "stddev_ts": 0.165791, "samples_ns": [ 989456475, 987756910, 986983488 ], "samples_ts": [ 129.364, 129.587, 129.688 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T21:50:39Z", "avg_ns": 4698087142, "stddev_ns": 369479415, "avg_ts": 27.353019, "stddev_ts": 2.057756, "samples_ns": [ 4482331300, 5124715901, 4487214226 ], "samples_ts": [ 28.5566, 24.977, 28.5255 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 160 }, { "timestamp_utc": "2025-12-08T21:51:54.567194+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:50:54Z\",\n \"avg_ns\": 1138433591,\n \"stddev_ns\": 247901389,\n \"avg_ts\": 115.684635,\n \"stddev_ts\": 22.384379,\n \"samples_ns\": [ 990365296, 1000307578, 1424627899 ],\n \"samples_ts\": [ 129.245, 127.961, 89.848 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:50:59Z\",\n \"avg_ns\": 18461455556,\n \"stddev_ns\": 47512891,\n \"avg_ts\": 27.733581,\n \"stddev_ts\": 0.071470,\n \"samples_ns\": [ 18480849097, 18407313394, 18496204177 ],\n \"samples_ts\": [ 27.7044, 27.815, 27.6814 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:50:54Z", "avg_ns": 1138433591, "stddev_ns": 247901389, "avg_ts": 115.684635, "stddev_ts": 22.384379, "samples_ns": [ 990365296, 1000307578, 1424627899 ], "samples_ts": [ 129.245, 127.961, 89.848 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T21:50:59Z", "avg_ns": 18461455556, "stddev_ns": 47512891, "avg_ts": 27.733581, "stddev_ts": 0.07147, "samples_ns": [ 18480849097, 18407313394, 18496204177 ], "samples_ts": [ 27.7044, 27.815, 27.6814 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 161 }, { "timestamp_utc": "2025-12-08T21:52:25.476635+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:51:55Z\",\n \"avg_ns\": 4217318536,\n \"stddev_ns\": 247033247,\n \"avg_ts\": 121.673380,\n \"stddev_ts\": 6.894230,\n \"samples_ns\": [ 4078379610, 4502536469, 4071039530 ],\n \"samples_ts\": [ 125.54, 113.714, 125.766 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:52:11Z\",\n \"avg_ns\": 4495041086,\n \"stddev_ns\": 45077436,\n \"avg_ts\": 28.477727,\n \"stddev_ts\": 0.284594,\n \"samples_ns\": [ 4456605686, 4483861316, 4544656258 ],\n \"samples_ts\": [ 28.7214, 28.5468, 28.1649 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:51:55Z", "avg_ns": 4217318536, "stddev_ns": 247033247, "avg_ts": 121.67338, "stddev_ts": 6.89423, "samples_ns": [ 4078379610, 4502536469, 4071039530 ], "samples_ts": [ 125.54, 113.714, 125.766 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T21:52:11Z", "avg_ns": 4495041086, "stddev_ns": 45077436, "avg_ts": 28.477727, "stddev_ts": 0.284594, "samples_ns": [ 4456605686, 4483861316, 4544656258 ], "samples_ts": [ 28.7214, 28.5468, 28.1649 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 162 }, { "timestamp_utc": "2025-12-08T21:53:43.868931+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:52:26Z\",\n \"avg_ns\": 4233757106,\n \"stddev_ns\": 133380711,\n \"avg_ts\": 121.014274,\n \"stddev_ts\": 3.880397,\n \"samples_ns\": [ 4080418383, 4297943179, 4322909758 ],\n \"samples_ts\": [ 125.477, 119.127, 118.439 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:52:42Z\",\n \"avg_ns\": 20299047242,\n \"stddev_ns\": 317760888,\n \"avg_ts\": 25.227014,\n \"stddev_ts\": 0.398159,\n \"samples_ns\": [ 19936170480, 20527520969, 20433450277 ],\n \"samples_ts\": [ 25.682, 24.9421, 25.057 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:52:26Z", "avg_ns": 4233757106, "stddev_ns": 133380711, "avg_ts": 121.014274, "stddev_ts": 3.880397, "samples_ns": [ 4080418383, 4297943179, 4322909758 ], "samples_ts": [ 125.477, 119.127, 118.439 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T21:52:42Z", "avg_ns": 20299047242, "stddev_ns": 317760888, "avg_ts": 25.227014, "stddev_ts": 0.398159, "samples_ns": [ 19936170480, 20527520969, 20433450277 ], "samples_ts": [ 25.682, 24.9421, 25.057 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 163 }, { "timestamp_utc": "2025-12-08T21:54:01.988919+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:53:44Z\",\n \"avg_ns\": 990486913,\n \"stddev_ns\": 3921874,\n \"avg_ts\": 129.230718,\n \"stddev_ts\": 0.510531,\n \"samples_ns\": [ 995006554, 988471523, 987982663 ],\n \"samples_ts\": [ 128.642, 129.493, 129.557 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:53:48Z\",\n \"avg_ns\": 4478731012,\n \"stddev_ns\": 29761656,\n \"avg_ts\": 28.580367,\n \"stddev_ts\": 0.190220,\n \"samples_ns\": [ 4505804831, 4446863583, 4483524623 ],\n \"samples_ts\": [ 28.4078, 28.7843, 28.549 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:53:44Z", "avg_ns": 990486913, "stddev_ns": 3921874, "avg_ts": 129.230718, "stddev_ts": 0.510531, "samples_ns": [ 995006554, 988471523, 987982663 ], "samples_ts": [ 128.642, 129.493, 129.557 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T21:53:48Z", "avg_ns": 4478731012, "stddev_ns": 29761656, "avg_ts": 28.580367, "stddev_ts": 0.19022, "samples_ns": [ 4505804831, 4446863583, 4483524623 ], "samples_ts": [ 28.4078, 28.7843, 28.549 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 164 }, { "timestamp_utc": "2025-12-08T21:55:01.754088+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:54:02Z\",\n \"avg_ns\": 992467405,\n \"stddev_ns\": 3977079,\n \"avg_ts\": 128.972868,\n \"stddev_ts\": 0.515795,\n \"samples_ns\": [ 990895033, 996990287, 989516895 ],\n \"samples_ts\": [ 129.176, 128.386, 129.356 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:54:06Z\",\n \"avg_ns\": 18345583666,\n \"stddev_ns\": 41746731,\n \"avg_ts\": 27.908720,\n \"stddev_ts\": 0.063426,\n \"samples_ns\": [ 18323466877, 18319548841, 18393735281 ],\n \"samples_ts\": [ 27.9423, 27.9483, 27.8356 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:54:02Z", "avg_ns": 992467405, "stddev_ns": 3977079, "avg_ts": 128.972868, "stddev_ts": 0.515795, "samples_ns": [ 990895033, 996990287, 989516895 ], "samples_ts": [ 129.176, 128.386, 129.356 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T21:54:06Z", "avg_ns": 18345583666, "stddev_ns": 41746731, "avg_ts": 27.90872, "stddev_ts": 0.063426, "samples_ns": [ 18323466877, 18319548841, 18393735281 ], "samples_ts": [ 27.9423, 27.9483, 27.8356 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 165 }, { "timestamp_utc": "2025-12-08T21:55:33.138222+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:55:02Z\",\n \"avg_ns\": 4247850095,\n \"stddev_ns\": 6032759,\n \"avg_ts\": 120.531722,\n \"stddev_ts\": 0.171064,\n \"samples_ns\": [ 4245672481, 4243208484, 4254669320 ],\n \"samples_ts\": [ 120.593, 120.663, 120.338 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:55:19Z\",\n \"avg_ns\": 4539923173,\n \"stddev_ns\": 11930775,\n \"avg_ts\": 28.194439,\n \"stddev_ts\": 0.073995,\n \"samples_ns\": [ 4553499189, 4531108417, 4535161914 ],\n \"samples_ts\": [ 28.1102, 28.2492, 28.2239 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:55:02Z", "avg_ns": 4247850095, "stddev_ns": 6032759, "avg_ts": 120.531722, "stddev_ts": 0.171064, "samples_ns": [ 4245672481, 4243208484, 4254669320 ], "samples_ts": [ 120.593, 120.663, 120.338 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T21:55:19Z", "avg_ns": 4539923173, "stddev_ns": 11930775, "avg_ts": 28.194439, "stddev_ts": 0.073995, "samples_ns": [ 4553499189, 4531108417, 4535161914 ], "samples_ts": [ 28.1102, 28.2492, 28.2239 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 166 }, { "timestamp_utc": "2025-12-08T21:56:46.478903+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:55:33Z\",\n \"avg_ns\": 4260468379,\n \"stddev_ns\": 8168527,\n \"avg_ts\": 120.174875,\n \"stddev_ts\": 0.230338,\n \"samples_ns\": [ 4259663024, 4252732621, 4269009493 ],\n \"samples_ts\": [ 120.197, 120.393, 119.934 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:55:50Z\",\n \"avg_ns\": 18524957719,\n \"stddev_ns\": 117849135,\n \"avg_ts\": 27.639134,\n \"stddev_ts\": 0.175471,\n \"samples_ns\": [ 18653911828, 18498117709, 18422843620 ],\n \"samples_ts\": [ 27.4473, 27.6785, 27.7916 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:55:33Z", "avg_ns": 4260468379, "stddev_ns": 8168527, "avg_ts": 120.174875, "stddev_ts": 0.230338, "samples_ns": [ 4259663024, 4252732621, 4269009493 ], "samples_ts": [ 120.197, 120.393, 119.934 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T21:55:50Z", "avg_ns": 18524957719, "stddev_ns": 117849135, "avg_ts": 27.639134, "stddev_ts": 0.175471, "samples_ns": [ 18653911828, 18498117709, 18422843620 ], "samples_ts": [ 27.4473, 27.6785, 27.7916 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 167 }, { "timestamp_utc": "2025-12-08T21:57:06.154054+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:56:47Z\",\n \"avg_ns\": 991683157,\n \"stddev_ns\": 2701772,\n \"avg_ts\": 129.074123,\n \"stddev_ts\": 0.351776,\n \"samples_ns\": [ 991913247, 994262526, 988873698 ],\n \"samples_ts\": [ 129.044, 128.739, 129.44 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:56:51Z\",\n \"avg_ns\": 4990730230,\n \"stddev_ns\": 33410669,\n \"avg_ts\": 25.648313,\n \"stddev_ts\": 0.171137,\n \"samples_ns\": [ 4965771209, 4977733217, 5028686265 ],\n \"samples_ts\": [ 25.7765, 25.7145, 25.454 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:56:47Z", "avg_ns": 991683157, "stddev_ns": 2701772, "avg_ts": 129.074123, "stddev_ts": 0.351776, "samples_ns": [ 991913247, 994262526, 988873698 ], "samples_ts": [ 129.044, 128.739, 129.44 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T21:56:51Z", "avg_ns": 4990730230, "stddev_ns": 33410669, "avg_ts": 25.648313, "stddev_ts": 0.171137, "samples_ns": [ 4965771209, 4977733217, 5028686265 ], "samples_ts": [ 25.7765, 25.7145, 25.454 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 168 }, { "timestamp_utc": "2025-12-08T21:58:06.912513+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:57:06Z\",\n \"avg_ns\": 990750330,\n \"stddev_ns\": 1654783,\n \"avg_ts\": 129.195251,\n \"stddev_ts\": 0.215794,\n \"samples_ns\": [ 991020426, 988977720, 992252846 ],\n \"samples_ts\": [ 129.16, 129.427, 128.999 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:57:10Z\",\n \"avg_ns\": 18686917356,\n \"stddev_ns\": 596369248,\n \"avg_ts\": 27.417129,\n \"stddev_ts\": 0.859243,\n \"samples_ns\": [ 19375123617, 18321948738, 18363679714 ],\n \"samples_ts\": [ 26.4256, 27.9446, 27.8811 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:57:06Z", "avg_ns": 990750330, "stddev_ns": 1654783, "avg_ts": 129.195251, "stddev_ts": 0.215794, "samples_ns": [ 991020426, 988977720, 992252846 ], "samples_ts": [ 129.16, 129.427, 128.999 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T21:57:10Z", "avg_ns": 18686917356, "stddev_ns": 596369248, "avg_ts": 27.417129, "stddev_ts": 0.859243, "samples_ns": [ 19375123617, 18321948738, 18363679714 ], "samples_ts": [ 26.4256, 27.9446, 27.8811 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 169 }, { "timestamp_utc": "2025-12-08T21:58:38.019494+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:58:07Z\",\n \"avg_ns\": 4080458484,\n \"stddev_ns\": 19352060,\n \"avg_ts\": 125.477977,\n \"stddev_ts\": 0.594847,\n \"samples_ns\": [ 4079308890, 4100359610, 4061706953 ],\n \"samples_ts\": [ 125.511, 124.867, 126.055 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:58:23Z\",\n \"avg_ns\": 4696428309,\n \"stddev_ns\": 354113382,\n \"avg_ts\": 27.354107,\n \"stddev_ts\": 1.976493,\n \"samples_ns\": [ 4494357442, 5105314049, 4489613436 ],\n \"samples_ts\": [ 28.4802, 25.0719, 28.5102 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:58:07Z", "avg_ns": 4080458484, "stddev_ns": 19352060, "avg_ts": 125.477977, "stddev_ts": 0.594847, "samples_ns": [ 4079308890, 4100359610, 4061706953 ], "samples_ts": [ 125.511, 124.867, 126.055 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T21:58:23Z", "avg_ns": 4696428309, "stddev_ns": 354113382, "avg_ts": 27.354107, "stddev_ts": 1.976493, "samples_ns": [ 4494357442, 5105314049, 4489613436 ], "samples_ts": [ 28.4802, 25.0719, 28.5102 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 170 }, { "timestamp_utc": "2025-12-08T21:59:52.809160+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:58:38Z\",\n \"avg_ns\": 4068902659,\n \"stddev_ns\": 3281925,\n \"avg_ts\": 125.832507,\n \"stddev_ts\": 0.101504,\n \"samples_ns\": [ 4070699474, 4070892416, 4065116089 ],\n \"samples_ts\": [ 125.777, 125.771, 125.95 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:58:55Z\",\n \"avg_ns\": 19108939497,\n \"stddev_ns\": 373638994,\n \"avg_ts\": 26.800620,\n \"stddev_ts\": 0.527624,\n \"samples_ns\": [ 19199298212, 18698407494, 19429112786 ],\n \"samples_ts\": [ 26.6676, 27.382, 26.3522 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:58:38Z", "avg_ns": 4068902659, "stddev_ns": 3281925, "avg_ts": 125.832507, "stddev_ts": 0.101504, "samples_ns": [ 4070699474, 4070892416, 4065116089 ], "samples_ts": [ 125.777, 125.771, 125.95 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T21:58:55Z", "avg_ns": 19108939497, "stddev_ns": 373638994, "avg_ts": 26.80062, "stddev_ts": 0.527624, "samples_ns": [ 19199298212, 18698407494, 19429112786 ], "samples_ts": [ 26.6676, 27.382, 26.3522 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 171 }, { "timestamp_utc": "2025-12-08T22:00:12.423562+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:59:53Z\",\n \"avg_ns\": 987757609,\n \"stddev_ns\": 1727874,\n \"avg_ts\": 129.586712,\n \"stddev_ts\": 0.226828,\n \"samples_ns\": [ 988351120, 985811499, 989110209 ],\n \"samples_ts\": [ 129.509, 129.842, 129.409 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:59:57Z\",\n \"avg_ns\": 4985937192,\n \"stddev_ns\": 9817185,\n \"avg_ts\": 25.672271,\n \"stddev_ts\": 0.050604,\n \"samples_ns\": [ 4992123725, 4974617915, 4991069937 ],\n \"samples_ts\": [ 25.6404, 25.7306, 25.6458 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T21:59:53Z", "avg_ns": 987757609, "stddev_ns": 1727874, "avg_ts": 129.586712, "stddev_ts": 0.226828, "samples_ns": [ 988351120, 985811499, 989110209 ], "samples_ts": [ 129.509, 129.842, 129.409 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T21:59:57Z", "avg_ns": 4985937192, "stddev_ns": 9817185, "avg_ts": 25.672271, "stddev_ts": 0.050604, "samples_ns": [ 4992123725, 4974617915, 4991069937 ], "samples_ts": [ 25.6404, 25.7306, 25.6458 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 172 }, { "timestamp_utc": "2025-12-08T22:01:12.966766+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:00:13Z\",\n \"avg_ns\": 995528362,\n \"stddev_ns\": 108860,\n \"avg_ts\": 128.574942,\n \"stddev_ts\": 0.014059,\n \"samples_ns\": [ 995653745, 995473406, 995457935 ],\n \"samples_ts\": [ 128.559, 128.582, 128.584 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:00:17Z\",\n \"avg_ns\": 18614559063,\n \"stddev_ns\": 342378384,\n \"avg_ts\": 27.511517,\n \"stddev_ts\": 0.502815,\n \"samples_ns\": [ 18528726099, 18991687282, 18323263808 ],\n \"samples_ts\": [ 27.6328, 26.9592, 27.9426 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T22:00:13Z", "avg_ns": 995528362, "stddev_ns": 108860, "avg_ts": 128.574942, "stddev_ts": 0.014059, "samples_ns": [ 995653745, 995473406, 995457935 ], "samples_ts": [ 128.559, 128.582, 128.584 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T22:00:17Z", "avg_ns": 18614559063, "stddev_ns": 342378384, "avg_ts": 27.511517, "stddev_ts": 0.502815, "samples_ns": [ 18528726099, 18991687282, 18323263808 ], "samples_ts": [ 27.6328, 26.9592, 27.9426 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 173 }, { "timestamp_utc": "2025-12-08T22:01:43.363773+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:01:13Z\",\n \"avg_ns\": 4064320101,\n \"stddev_ns\": 3083866,\n \"avg_ts\": 125.974378,\n \"stddev_ts\": 0.095586,\n \"samples_ns\": [ 4060760759, 4066120100, 4066079446 ],\n \"samples_ts\": [ 126.085, 125.919, 125.92 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:01:29Z\",\n \"avg_ns\": 4484813587,\n \"stddev_ns\": 14227647,\n \"avg_ts\": 28.540954,\n \"stddev_ts\": 0.090695,\n \"samples_ns\": [ 4494924542, 4490972047, 4468544172 ],\n \"samples_ts\": [ 28.4766, 28.5016, 28.6447 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T22:01:13Z", "avg_ns": 4064320101, "stddev_ns": 3083866, "avg_ts": 125.974378, "stddev_ts": 0.095586, "samples_ns": [ 4060760759, 4066120100, 4066079446 ], "samples_ts": [ 126.085, 125.919, 125.92 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T22:01:29Z", "avg_ns": 4484813587, "stddev_ns": 14227647, "avg_ts": 28.540954, "stddev_ts": 0.090695, "samples_ns": [ 4494924542, 4490972047, 4468544172 ], "samples_ts": [ 28.4766, 28.5016, 28.6447 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 174 }, { "timestamp_utc": "2025-12-08T22:02:59.826574+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:01:43Z\",\n \"avg_ns\": 4055325639,\n \"stddev_ns\": 9086677,\n \"avg_ts\": 126.254155,\n \"stddev_ts\": 0.282523,\n \"samples_ns\": [ 4049988936, 4050170734, 4065817248 ],\n \"samples_ts\": [ 126.42, 126.414, 125.928 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:02:00Z\",\n \"avg_ns\": 19835755012,\n \"stddev_ns\": 108456066,\n \"avg_ts\": 25.812489,\n \"stddev_ts\": 0.141108,\n \"samples_ns\": [ 19728672782, 19833057659, 19945534595 ],\n \"samples_ts\": [ 25.9521, 25.8155, 25.6699 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T22:01:43Z", "avg_ns": 4055325639, "stddev_ns": 9086677, "avg_ts": 126.254155, "stddev_ts": 0.282523, "samples_ns": [ 4049988936, 4050170734, 4065817248 ], "samples_ts": [ 126.42, 126.414, 125.928 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T22:02:00Z", "avg_ns": 19835755012, "stddev_ns": 108456066, "avg_ts": 25.812489, "stddev_ts": 0.141108, "samples_ns": [ 19728672782, 19833057659, 19945534595 ], "samples_ts": [ 25.9521, 25.8155, 25.6699 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 175 }, { "timestamp_utc": "2025-12-08T22:03:20.014380+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:03:00Z\",\n \"avg_ns\": 994815326,\n \"stddev_ns\": 2869471,\n \"avg_ts\": 128.667811,\n \"stddev_ts\": 0.371511,\n \"samples_ns\": [ 995637042, 991624997, 997183941 ],\n \"samples_ts\": [ 128.561, 129.081, 128.361 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:03:04Z\",\n \"avg_ns\": 5166053292,\n \"stddev_ns\": 374677890,\n \"avg_ts\": 24.860814,\n \"stddev_ts\": 1.730649,\n \"samples_ns\": [ 4945864086, 4953624957, 5598670835 ],\n \"samples_ts\": [ 25.8802, 25.8397, 22.8626 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T22:03:00Z", "avg_ns": 994815326, "stddev_ns": 2869471, "avg_ts": 128.667811, "stddev_ts": 0.371511, "samples_ns": [ 995637042, 991624997, 997183941 ], "samples_ts": [ 128.561, 129.081, 128.361 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T22:03:04Z", "avg_ns": 5166053292, "stddev_ns": 374677890, "avg_ts": 24.860814, "stddev_ts": 1.730649, "samples_ns": [ 4945864086, 4953624957, 5598670835 ], "samples_ts": [ 25.8802, 25.8397, 22.8626 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 176 }, { "timestamp_utc": "2025-12-08T22:04:21.116564+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:03:20Z\",\n \"avg_ns\": 985646090,\n \"stddev_ns\": 1006512,\n \"avg_ts\": 129.864147,\n \"stddev_ts\": 0.132591,\n \"samples_ns\": [ 985533107, 984700836, 986704327 ],\n \"samples_ts\": [ 129.879, 129.989, 129.725 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:03:24Z\",\n \"avg_ns\": 18810078414,\n \"stddev_ns\": 373935388,\n \"avg_ts\": 27.226707,\n \"stddev_ts\": 0.547498,\n \"samples_ns\": [ 19040141773, 18378612203, 19011481266 ],\n \"samples_ts\": [ 26.8906, 27.8585, 26.9311 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T22:03:20Z", "avg_ns": 985646090, "stddev_ns": 1006512, "avg_ts": 129.864147, "stddev_ts": 0.132591, "samples_ns": [ 985533107, 984700836, 986704327 ], "samples_ts": [ 129.879, 129.989, 129.725 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T22:03:24Z", "avg_ns": 18810078414, "stddev_ns": 373935388, "avg_ts": 27.226707, "stddev_ts": 0.547498, "samples_ns": [ 19040141773, 18378612203, 19011481266 ], "samples_ts": [ 26.8906, 27.8585, 26.9311 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 177 }, { "timestamp_utc": "2025-12-08T22:04:52.383807+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:04:21Z\",\n \"avg_ns\": 4256997797,\n \"stddev_ns\": 9064113,\n \"avg_ts\": 120.272918,\n \"stddev_ts\": 0.256008,\n \"samples_ns\": [ 4256091033, 4248421146, 4266481212 ],\n \"samples_ts\": [ 120.298, 120.515, 120.005 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:04:38Z\",\n \"avg_ns\": 4507838337,\n \"stddev_ns\": 28928468,\n \"avg_ts\": 28.395764,\n \"stddev_ts\": 0.182094,\n \"samples_ns\": [ 4537831068, 4505577094, 4480106849 ],\n \"samples_ts\": [ 28.2073, 28.4092, 28.5707 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T22:04:21Z", "avg_ns": 4256997797, "stddev_ns": 9064113, "avg_ts": 120.272918, "stddev_ts": 0.256008, "samples_ns": [ 4256091033, 4248421146, 4266481212 ], "samples_ts": [ 120.298, 120.515, 120.005 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T22:04:38Z", "avg_ns": 4507838337, "stddev_ns": 28928468, "avg_ts": 28.395764, "stddev_ts": 0.182094, "samples_ns": [ 4537831068, 4505577094, 4480106849 ], "samples_ts": [ 28.2073, 28.4092, 28.5707 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 178 }, { "timestamp_utc": "2025-12-08T22:06:05.662682+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:04:53Z\",\n \"avg_ns\": 4264136118,\n \"stddev_ns\": 9295088,\n \"avg_ts\": 120.071594,\n \"stddev_ts\": 0.261657,\n \"samples_ns\": [ 4255265081, 4273803551, 4263339723 ],\n \"samples_ts\": [ 120.322, 119.8, 120.094 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:05:10Z\",\n \"avg_ns\": 18497440796,\n \"stddev_ns\": 64442258,\n \"avg_ts\": 27.679729,\n \"stddev_ts\": 0.096424,\n \"samples_ns\": [ 18562444191, 18433574729, 18496303468 ],\n \"samples_ts\": [ 27.5826, 27.7754, 27.6812 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T22:04:53Z", "avg_ns": 4264136118, "stddev_ns": 9295088, "avg_ts": 120.071594, "stddev_ts": 0.261657, "samples_ns": [ 4255265081, 4273803551, 4263339723 ], "samples_ts": [ 120.322, 119.8, 120.094 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T22:05:10Z", "avg_ns": 18497440796, "stddev_ns": 64442258, "avg_ts": 27.679729, "stddev_ts": 0.096424, "samples_ns": [ 18562444191, 18433574729, 18496303468 ], "samples_ts": [ 27.5826, 27.7754, 27.6812 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 179 }, { "timestamp_utc": "2025-12-08T22:06:16.200888+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:06:06Z\",\n \"avg_ns\": 522332810,\n \"stddev_ns\": 2610886,\n \"avg_ts\": 245.058559,\n \"stddev_ts\": 1.221536,\n \"samples_ns\": [ 521148196, 520524452, 525325784 ],\n \"samples_ts\": [ 245.612, 245.906, 243.658 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:06:08Z\",\n \"avg_ns\": 2582138987,\n \"stddev_ns\": 5941397,\n \"avg_ts\": 49.571480,\n \"stddev_ts\": 0.114153,\n \"samples_ns\": [ 2575557231, 2587104798, 2583754934 ],\n \"samples_ts\": [ 49.698, 49.4762, 49.5403 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T22:06:06Z", "avg_ns": 522332810, "stddev_ns": 2610886, "avg_ts": 245.058559, "stddev_ts": 1.221536, "samples_ns": [ 521148196, 520524452, 525325784 ], "samples_ts": [ 245.612, 245.906, 243.658 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T22:06:08Z", "avg_ns": 2582138987, "stddev_ns": 5941397, "avg_ts": 49.57148, "stddev_ts": 0.114153, "samples_ns": [ 2575557231, 2587104798, 2583754934 ], "samples_ts": [ 49.698, 49.4762, 49.5403 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 512, "ubatch_size": 128, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 180 }, { "timestamp_utc": "2025-12-08T22:06:51.069316+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:06:16Z\",\n \"avg_ns\": 523769919,\n \"stddev_ns\": 808945,\n \"avg_ts\": 244.382503,\n \"stddev_ts\": 0.377716,\n \"samples_ns\": [ 522855452, 524062214, 524392091 ],\n \"samples_ts\": [ 244.81, 244.246, 244.092 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:06:18Z\",\n \"avg_ns\": 10691751347,\n \"stddev_ns\": 31813669,\n \"avg_ts\": 47.887667,\n \"stddev_ts\": 0.142569,\n \"samples_ns\": [ 10695658899, 10721430747, 10658164395 ],\n \"samples_ts\": [ 47.8699, 47.7548, 48.0383 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T22:06:16Z", "avg_ns": 523769919, "stddev_ns": 808945, "avg_ts": 244.382503, "stddev_ts": 0.377716, "samples_ns": [ 522855452, 524062214, 524392091 ], "samples_ts": [ 244.81, 244.246, 244.092 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T22:06:18Z", "avg_ns": 10691751347, "stddev_ns": 31813669, "avg_ts": 47.887667, "stddev_ts": 0.142569, "samples_ns": [ 10695658899, 10721430747, 10658164395 ], "samples_ts": [ 47.8699, 47.7548, 48.0383 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 512, "ubatch_size": 128, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 181 }, { "timestamp_utc": "2025-12-08T22:07:08.691092+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:06:51Z\",\n \"avg_ns\": 2126626660,\n \"stddev_ns\": 5119544,\n \"avg_ts\": 240.757811,\n \"stddev_ts\": 0.579875,\n \"samples_ns\": [ 2121165851, 2131317238, 2127396892 ],\n \"samples_ts\": [ 241.377, 240.227, 240.67 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:07:00Z\",\n \"avg_ns\": 2806431473,\n \"stddev_ns\": 10212573,\n \"avg_ts\": 45.609926,\n \"stddev_ts\": 0.166159,\n \"samples_ns\": [ 2815347294, 2795289840, 2808657287 ],\n \"samples_ts\": [ 45.4651, 45.7913, 45.5734 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T22:06:51Z", "avg_ns": 2126626660, "stddev_ns": 5119544, "avg_ts": 240.757811, "stddev_ts": 0.579875, "samples_ns": [ 2121165851, 2131317238, 2127396892 ], "samples_ts": [ 241.377, 240.227, 240.67 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T22:07:00Z", "avg_ns": 2806431473, "stddev_ns": 10212573, "avg_ts": 45.609926, "stddev_ts": 0.166159, "samples_ns": [ 2815347294, 2795289840, 2808657287 ], "samples_ts": [ 45.4651, 45.7913, 45.5734 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 512, "ubatch_size": 128, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 182 }, { "timestamp_utc": "2025-12-08T22:07:49.560044+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:07:09Z\",\n \"avg_ns\": 2119209452,\n \"stddev_ns\": 9415914,\n \"avg_ts\": 241.602698,\n \"stddev_ts\": 1.070715,\n \"samples_ns\": [ 2113922722, 2113625109, 2130080526 ],\n \"samples_ts\": [ 242.204, 242.238, 240.366 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:07:17Z\",\n \"avg_ns\": 10553540679,\n \"stddev_ns\": 60341581,\n \"avg_ts\": 48.515584,\n \"stddev_ts\": 0.278111,\n \"samples_ns\": [ 10601049205, 10573926097, 10485646735 ],\n \"samples_ts\": [ 48.2971, 48.421, 48.8287 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T22:07:09Z", "avg_ns": 2119209452, "stddev_ns": 9415914, "avg_ts": 241.602698, "stddev_ts": 1.070715, "samples_ns": [ 2113922722, 2113625109, 2130080526 ], "samples_ts": [ 242.204, 242.238, 240.366 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T22:07:17Z", "avg_ns": 10553540679, "stddev_ns": 60341581, "avg_ts": 48.515584, "stddev_ts": 0.278111, "samples_ns": [ 10601049205, 10573926097, 10485646735 ], "samples_ts": [ 48.2971, 48.421, 48.8287 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 512, "ubatch_size": 128, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 183 }, { "timestamp_utc": "2025-12-08T22:08:00.646594+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:07:50Z\",\n \"avg_ns\": 520755462,\n \"stddev_ns\": 2033150,\n \"avg_ts\": 245.799241,\n \"stddev_ts\": 0.957380,\n \"samples_ns\": [ 519603132, 519560538, 523102718 ],\n \"samples_ts\": [ 246.342, 246.362, 244.694 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:07:52Z\",\n \"avg_ns\": 2769186867,\n \"stddev_ns\": 350278781,\n \"avg_ts\": 46.687184,\n \"stddev_ts\": 5.505177,\n \"samples_ns\": [ 3173502981, 2576598810, 2557458810 ],\n \"samples_ts\": [ 40.334, 49.6779, 50.0497 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T22:07:50Z", "avg_ns": 520755462, "stddev_ns": 2033150, "avg_ts": 245.799241, "stddev_ts": 0.95738, "samples_ns": [ 519603132, 519560538, 523102718 ], "samples_ts": [ 246.342, 246.362, 244.694 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T22:07:52Z", "avg_ns": 2769186867, "stddev_ns": 350278781, "avg_ts": 46.687184, "stddev_ts": 5.505177, "samples_ns": [ 3173502981, 2576598810, 2557458810 ], "samples_ts": [ 40.334, 49.6779, 50.0497 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 512, "ubatch_size": 256, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 184 }, { "timestamp_utc": "2025-12-08T22:08:35.142428+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:08:01Z\",\n \"avg_ns\": 522432009,\n \"stddev_ns\": 1069641,\n \"avg_ts\": 245.008642,\n \"stddev_ts\": 0.501188,\n \"samples_ns\": [ 523602886, 521506872, 522186270 ],\n \"samples_ts\": [ 244.46, 245.443, 245.123 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:08:03Z\",\n \"avg_ns\": 10561823622,\n \"stddev_ns\": 22499990,\n \"avg_ts\": 48.476624,\n \"stddev_ts\": 0.103183,\n \"samples_ns\": [ 10555307340, 10586862560, 10543300966 ],\n \"samples_ts\": [ 48.5064, 48.3618, 48.5616 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T22:08:01Z", "avg_ns": 522432009, "stddev_ns": 1069641, "avg_ts": 245.008642, "stddev_ts": 0.501188, "samples_ns": [ 523602886, 521506872, 522186270 ], "samples_ts": [ 244.46, 245.443, 245.123 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T22:08:03Z", "avg_ns": 10561823622, "stddev_ns": 22499990, "avg_ts": 48.476624, "stddev_ts": 0.103183, "samples_ns": [ 10555307340, 10586862560, 10543300966 ], "samples_ts": [ 48.5064, 48.3618, 48.5616 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 512, "ubatch_size": 256, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 185 }, { "timestamp_utc": "2025-12-08T22:08:52.747265+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:08:35Z\",\n \"avg_ns\": 2112212402,\n \"stddev_ns\": 1093254,\n \"avg_ts\": 242.399908,\n \"stddev_ts\": 0.125472,\n \"samples_ns\": [ 2113254005, 2112309259, 2111073942 ],\n \"samples_ts\": [ 242.28, 242.389, 242.531 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:08:44Z\",\n \"avg_ns\": 2811355500,\n \"stddev_ns\": 357597755,\n \"avg_ts\": 45.991795,\n \"stddev_ts\": 5.450080,\n \"samples_ns\": [ 2608849642, 2600968111, 3224248747 ],\n \"samples_ts\": [ 49.0638, 49.2124, 39.6992 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T22:08:35Z", "avg_ns": 2112212402, "stddev_ns": 1093254, "avg_ts": 242.399908, "stddev_ts": 0.125472, "samples_ns": [ 2113254005, 2112309259, 2111073942 ], "samples_ts": [ 242.28, 242.389, 242.531 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T22:08:44Z", "avg_ns": 2811355500, "stddev_ns": 357597755, "avg_ts": 45.991795, "stddev_ts": 5.45008, "samples_ns": [ 2608849642, 2600968111, 3224248747 ], "samples_ts": [ 49.0638, 49.2124, 39.6992 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 512, "ubatch_size": 256, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 186 }, { "timestamp_utc": "2025-12-08T22:09:37.127322+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:08:53Z\",\n \"avg_ns\": 2294705926,\n \"stddev_ns\": 314253900,\n \"avg_ts\": 225.738026,\n \"stddev_ts\": 28.650796,\n \"samples_ns\": [ 2117081251, 2109487946, 2657548583 ],\n \"samples_ts\": [ 241.842, 242.713, 192.659 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:09:02Z\",\n \"avg_ns\": 11552143893,\n \"stddev_ns\": 512986570,\n \"avg_ts\": 44.378850,\n \"stddev_ts\": 1.962252,\n \"samples_ns\": [ 11060736776, 11511413352, 12084281552 ],\n \"samples_ts\": [ 46.2899, 44.4776, 42.3691 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T22:08:53Z", "avg_ns": 2294705926, "stddev_ns": 314253900, "avg_ts": 225.738026, "stddev_ts": 28.650796, "samples_ns": [ 2117081251, 2109487946, 2657548583 ], "samples_ts": [ 241.842, 242.713, 192.659 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T22:09:02Z", "avg_ns": 11552143893, "stddev_ns": 512986570, "avg_ts": 44.37885, "stddev_ts": 1.962252, "samples_ns": [ 11060736776, 11511413352, 12084281552 ], "samples_ts": [ 46.2899, 44.4776, 42.3691 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 512, "ubatch_size": 256, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 187 }, { "timestamp_utc": "2025-12-08T22:09:47.663632+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:09:37Z\",\n \"avg_ns\": 520523274,\n \"stddev_ns\": 270532,\n \"avg_ts\": 245.906435,\n \"stddev_ts\": 0.126929,\n \"samples_ns\": [ 520648661, 520706255, 520214908 ],\n \"samples_ts\": [ 245.847, 245.82, 246.052 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:09:39Z\",\n \"avg_ns\": 2584524374,\n \"stddev_ns\": 24484347,\n \"avg_ts\": 49.528512,\n \"stddev_ts\": 0.468491,\n \"samples_ns\": [ 2561520974, 2610260397, 2581791751 ],\n \"samples_ts\": [ 49.9703, 49.0373, 49.578 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T22:09:37Z", "avg_ns": 520523274, "stddev_ns": 270532, "avg_ts": 245.906435, "stddev_ts": 0.126929, "samples_ns": [ 520648661, 520706255, 520214908 ], "samples_ts": [ 245.847, 245.82, 246.052 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T22:09:39Z", "avg_ns": 2584524374, "stddev_ns": 24484347, "avg_ts": 49.528512, "stddev_ts": 0.468491, "samples_ns": [ 2561520974, 2610260397, 2581791751 ], "samples_ts": [ 49.9703, 49.0373, 49.578 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 512, "ubatch_size": 512, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 188 }, { "timestamp_utc": "2025-12-08T22:10:22.233131+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:09:48Z\",\n \"avg_ns\": 520506024,\n \"stddev_ns\": 476499,\n \"avg_ts\": 245.914677,\n \"stddev_ts\": 0.224595,\n \"samples_ns\": [ 520040080, 520990359, 520487635 ],\n \"samples_ts\": [ 246.135, 245.686, 245.923 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:09:50Z\",\n \"avg_ns\": 10599778857,\n \"stddev_ns\": 44390697,\n \"avg_ts\": 48.303458,\n \"stddev_ts\": 0.201803,\n \"samples_ns\": [ 10573120235, 10651022841, 10575193495 ],\n \"samples_ts\": [ 48.4247, 48.0705, 48.4152 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T22:09:48Z", "avg_ns": 520506024, "stddev_ns": 476499, "avg_ts": 245.914677, "stddev_ts": 0.224595, "samples_ns": [ 520040080, 520990359, 520487635 ], "samples_ts": [ 246.135, 245.686, 245.923 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T22:09:50Z", "avg_ns": 10599778857, "stddev_ns": 44390697, "avg_ts": 48.303458, "stddev_ts": 0.201803, "samples_ns": [ 10573120235, 10651022841, 10575193495 ], "samples_ts": [ 48.4247, 48.0705, 48.4152 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 512, "ubatch_size": 512, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 189 }, { "timestamp_utc": "2025-12-08T22:10:39.530476+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:10:22Z\",\n \"avg_ns\": 2206626171,\n \"stddev_ns\": 2116161,\n \"avg_ts\": 232.028569,\n \"stddev_ts\": 0.222522,\n \"samples_ns\": [ 2204200717, 2208087099, 2207590699 ],\n \"samples_ts\": [ 232.284, 231.875, 231.927 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:10:31Z\",\n \"avg_ns\": 2586505541,\n \"stddev_ns\": 17264622,\n \"avg_ts\": 49.489086,\n \"stddev_ts\": 0.329672,\n \"samples_ns\": [ 2605300456, 2582863775, 2571352393 ],\n \"samples_ts\": [ 49.1306, 49.5574, 49.7793 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T22:10:22Z", "avg_ns": 2206626171, "stddev_ns": 2116161, "avg_ts": 232.028569, "stddev_ts": 0.222522, "samples_ns": [ 2204200717, 2208087099, 2207590699 ], "samples_ts": [ 232.284, 231.875, 231.927 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T22:10:31Z", "avg_ns": 2586505541, "stddev_ns": 17264622, "avg_ts": 49.489086, "stddev_ts": 0.329672, "samples_ns": [ 2605300456, 2582863775, 2571352393 ], "samples_ts": [ 49.1306, 49.5574, 49.7793 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 512, "ubatch_size": 512, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 190 }, { "timestamp_utc": "2025-12-08T22:11:22.201262+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:10:40Z\",\n \"avg_ns\": 2250490048,\n \"stddev_ns\": 80265943,\n \"avg_ts\": 227.695204,\n \"stddev_ts\": 7.957137,\n \"samples_ns\": [ 2204528513, 2343172108, 2203769525 ],\n \"samples_ts\": [ 232.249, 218.507, 232.329 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:10:49Z\",\n \"avg_ns\": 11002191084,\n \"stddev_ns\": 321977687,\n \"avg_ts\": 46.563222,\n \"stddev_ts\": 1.385787,\n \"samples_ns\": [ 11170375062, 10630948924, 11205249268 ],\n \"samples_ts\": [ 45.8355, 48.1613, 45.6929 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T22:10:40Z", "avg_ns": 2250490048, "stddev_ns": 80265943, "avg_ts": 227.695204, "stddev_ts": 7.957137, "samples_ns": [ 2204528513, 2343172108, 2203769525 ], "samples_ts": [ 232.249, 218.507, 232.329 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T22:10:49Z", "avg_ns": 11002191084, "stddev_ns": 321977687, "avg_ts": 46.563222, "stddev_ts": 1.385787, "samples_ns": [ 11170375062, 10630948924, 11205249268 ], "samples_ts": [ 45.8355, 48.1613, 45.6929 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 512, "ubatch_size": 512, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 191 }, { "timestamp_utc": "2025-12-08T22:11:32.724536+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:11:22Z\",\n \"avg_ns\": 521811417,\n \"stddev_ns\": 1830795,\n \"avg_ts\": 245.301360,\n \"stddev_ts\": 0.860728,\n \"samples_ns\": [ 519953129, 523613124, 521867999 ],\n \"samples_ts\": [ 246.176, 244.455, 245.273 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:11:24Z\",\n \"avg_ns\": 2572979736,\n \"stddev_ns\": 2758317,\n \"avg_ts\": 49.747806,\n \"stddev_ts\": 0.053280,\n \"samples_ns\": [ 2571385105, 2576163689, 2571390416 ],\n \"samples_ts\": [ 49.7786, 49.6863, 49.7785 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T22:11:22Z", "avg_ns": 521811417, "stddev_ns": 1830795, "avg_ts": 245.30136, "stddev_ts": 0.860728, "samples_ns": [ 519953129, 523613124, 521867999 ], "samples_ts": [ 246.176, 244.455, 245.273 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T22:11:24Z", "avg_ns": 2572979736, "stddev_ns": 2758317, "avg_ts": 49.747806, "stddev_ts": 0.05328, "samples_ns": [ 2571385105, 2576163689, 2571390416 ], "samples_ts": [ 49.7786, 49.6863, 49.7785 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 192 }, { "timestamp_utc": "2025-12-08T22:12:10.487648+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:11:33Z\",\n \"avg_ns\": 527644593,\n \"stddev_ns\": 7696940,\n \"avg_ts\": 242.621683,\n \"stddev_ts\": 3.511686,\n \"samples_ns\": [ 524181688, 522287385, 536464706 ],\n \"samples_ts\": [ 244.19, 245.076, 238.599 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:11:35Z\",\n \"avg_ns\": 11651127238,\n \"stddev_ns\": 319030946,\n \"avg_ts\": 43.966025,\n \"stddev_ts\": 1.193195,\n \"samples_ns\": [ 11575364599, 11376797492, 12001219624 ],\n \"samples_ts\": [ 44.2319, 45.0039, 42.6623 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T22:11:33Z", "avg_ns": 527644593, "stddev_ns": 7696940, "avg_ts": 242.621683, "stddev_ts": 3.511686, "samples_ns": [ 524181688, 522287385, 536464706 ], "samples_ts": [ 244.19, 245.076, 238.599 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T22:11:35Z", "avg_ns": 11651127238, "stddev_ns": 319030946, "avg_ts": 43.966025, "stddev_ts": 1.193195, "samples_ns": [ 11575364599, 11376797492, 12001219624 ], "samples_ts": [ 44.2319, 45.0039, 42.6623 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 193 }, { "timestamp_utc": "2025-12-08T22:12:27.848761+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:12:11Z\",\n \"avg_ns\": 2269775903,\n \"stddev_ns\": 267065290,\n \"avg_ts\": 227.539423,\n \"stddev_ts\": 25.069716,\n \"samples_ns\": [ 2578154454, 2116520144, 2114653111 ],\n \"samples_ts\": [ 198.592, 241.907, 242.12 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:12:20Z\",\n \"avg_ns\": 2577707933,\n \"stddev_ns\": 5455495,\n \"avg_ts\": 49.656666,\n \"stddev_ts\": 0.105090,\n \"samples_ns\": [ 2577712004, 2583161156, 2572250640 ],\n \"samples_ts\": [ 49.6564, 49.5517, 49.7619 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T22:12:11Z", "avg_ns": 2269775903, "stddev_ns": 267065290, "avg_ts": 227.539423, "stddev_ts": 25.069716, "samples_ns": [ 2578154454, 2116520144, 2114653111 ], "samples_ts": [ 198.592, 241.907, 242.12 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T22:12:20Z", "avg_ns": 2577707933, "stddev_ns": 5455495, "avg_ts": 49.656666, "stddev_ts": 0.10509, "samples_ns": [ 2577712004, 2583161156, 2572250640 ], "samples_ts": [ 49.6564, 49.5517, 49.7619 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 194 }, { "timestamp_utc": "2025-12-08T22:13:09.837040+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:12:28Z\",\n \"avg_ns\": 2118319257,\n \"stddev_ns\": 5949717,\n \"avg_ts\": 241.702325,\n \"stddev_ts\": 0.678372,\n \"samples_ns\": [ 2112921349, 2117338288, 2124698136 ],\n \"samples_ts\": [ 242.319, 241.813, 240.975 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:12:36Z\",\n \"avg_ns\": 10939775520,\n \"stddev_ns\": 326718523,\n \"avg_ts\": 46.830025,\n \"stddev_ts\": 1.423077,\n \"samples_ns\": [ 11135349729, 11121377087, 10562599745 ],\n \"samples_ts\": [ 45.9797, 46.0375, 48.4729 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T22:12:28Z", "avg_ns": 2118319257, "stddev_ns": 5949717, "avg_ts": 241.702325, "stddev_ts": 0.678372, "samples_ns": [ 2112921349, 2117338288, 2124698136 ], "samples_ts": [ 242.319, 241.813, 240.975 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T22:12:36Z", "avg_ns": 10939775520, "stddev_ns": 326718523, "avg_ts": 46.830025, "stddev_ts": 1.423077, "samples_ns": [ 11135349729, 11121377087, 10562599745 ], "samples_ts": [ 45.9797, 46.0375, 48.4729 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 195 }, { "timestamp_utc": "2025-12-08T22:13:20.887482+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:13:10Z\",\n \"avg_ns\": 687689746,\n \"stddev_ns\": 203179161,\n \"avg_ts\": 196.441762,\n \"stddev_ts\": 52.814274,\n \"samples_ns\": [ 523500576, 624652635, 914916027 ],\n \"samples_ts\": [ 244.508, 204.914, 139.904 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:13:13Z\",\n \"avg_ns\": 2587776891,\n \"stddev_ns\": 1148173,\n \"avg_ts\": 49.463312,\n \"stddev_ts\": 0.021902,\n \"samples_ns\": [ 2587676326, 2586684571, 2588969778 ],\n \"samples_ts\": [ 49.4652, 49.4842, 49.4405 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T22:13:10Z", "avg_ns": 687689746, "stddev_ns": 203179161, "avg_ts": 196.441762, "stddev_ts": 52.814274, "samples_ns": [ 523500576, 624652635, 914916027 ], "samples_ts": [ 244.508, 204.914, 139.904 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T22:13:13Z", "avg_ns": 2587776891, "stddev_ns": 1148173, "avg_ts": 49.463312, "stddev_ts": 0.021902, "samples_ns": [ 2587676326, 2586684571, 2588969778 ], "samples_ts": [ 49.4652, 49.4842, 49.4405 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 196 }, { "timestamp_utc": "2025-12-08T22:13:56.991958+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:13:21Z\",\n \"avg_ns\": 524719451,\n \"stddev_ns\": 593836,\n \"avg_ts\": 243.940088,\n \"stddev_ts\": 0.275896,\n \"samples_ns\": [ 524336947, 525403562, 524417844 ],\n \"samples_ts\": [ 244.118, 243.622, 244.08 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:13:23Z\",\n \"avg_ns\": 11099411288,\n \"stddev_ns\": 351009241,\n \"avg_ts\": 46.159918,\n \"stddev_ts\": 1.486778,\n \"samples_ns\": [ 11289969051, 11313927954, 10694336860 ],\n \"samples_ts\": [ 45.35, 45.254, 47.8758 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T22:13:21Z", "avg_ns": 524719451, "stddev_ns": 593836, "avg_ts": 243.940088, "stddev_ts": 0.275896, "samples_ns": [ 524336947, 525403562, 524417844 ], "samples_ts": [ 244.118, 243.622, 244.08 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T22:13:23Z", "avg_ns": 11099411288, "stddev_ns": 351009241, "avg_ts": 46.159918, "stddev_ts": 1.486778, "samples_ns": [ 11289969051, 11313927954, 10694336860 ], "samples_ts": [ 45.35, 45.254, 47.8758 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 197 }, { "timestamp_utc": "2025-12-08T22:14:14.442135+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:13:57Z\",\n \"avg_ns\": 2122864005,\n \"stddev_ns\": 2476525,\n \"avg_ts\": 241.183827,\n \"stddev_ts\": 0.281321,\n \"samples_ns\": [ 2125326805, 2122890389, 2120374822 ],\n \"samples_ts\": [ 240.904, 241.181, 241.467 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:14:06Z\",\n \"avg_ns\": 2597972795,\n \"stddev_ns\": 4052348,\n \"avg_ts\": 49.269264,\n \"stddev_ts\": 0.076802,\n \"samples_ns\": [ 2596772928, 2594655872, 2602489585 ],\n \"samples_ts\": [ 49.2919, 49.3322, 49.1837 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T22:13:57Z", "avg_ns": 2122864005, "stddev_ns": 2476525, "avg_ts": 241.183827, "stddev_ts": 0.281321, "samples_ns": [ 2125326805, 2122890389, 2120374822 ], "samples_ts": [ 240.904, 241.181, 241.467 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T22:14:06Z", "avg_ns": 2597972795, "stddev_ns": 4052348, "avg_ts": 49.269264, "stddev_ts": 0.076802, "samples_ns": [ 2596772928, 2594655872, 2602489585 ], "samples_ts": [ 49.2919, 49.3322, 49.1837 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 198 }, { "timestamp_utc": "2025-12-08T22:14:56.203588+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:14:15Z\",\n \"avg_ns\": 2118129834,\n \"stddev_ns\": 6941662,\n \"avg_ts\": 241.724400,\n \"stddev_ts\": 0.791723,\n \"samples_ns\": [ 2111657979, 2117270277, 2125461246 ],\n \"samples_ts\": [ 242.464, 241.821, 240.889 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:14:23Z\",\n \"avg_ns\": 10862800449,\n \"stddev_ns\": 387977400,\n \"avg_ts\": 47.172640,\n \"stddev_ts\": 1.650785,\n \"samples_ns\": [ 10638898851, 11310798116, 10638704382 ],\n \"samples_ts\": [ 48.1253, 45.2665, 48.1262 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T22:14:15Z", "avg_ns": 2118129834, "stddev_ns": 6941662, "avg_ts": 241.7244, "stddev_ts": 0.791723, "samples_ns": [ 2111657979, 2117270277, 2125461246 ], "samples_ts": [ 242.464, 241.821, 240.889 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T22:14:23Z", "avg_ns": 10862800449, "stddev_ns": 387977400, "avg_ts": 47.17264, "stddev_ts": 1.650785, "samples_ns": [ 10638898851, 11310798116, 10638704382 ], "samples_ts": [ 48.1253, 45.2665, 48.1262 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 199 }, { "timestamp_utc": "2025-12-08T22:15:06.798537+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:14:56Z\",\n \"avg_ns\": 519585666,\n \"stddev_ns\": 2247834,\n \"avg_ts\": 246.353217,\n \"stddev_ts\": 1.068105,\n \"samples_ns\": [ 517015396, 520559353, 521182251 ],\n \"samples_ts\": [ 247.575, 245.889, 245.595 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:14:58Z\",\n \"avg_ns\": 2601167089,\n \"stddev_ns\": 6329539,\n \"avg_ts\": 49.208874,\n \"stddev_ts\": 0.119641,\n \"samples_ns\": [ 2595678302, 2599732635, 2608090332 ],\n \"samples_ts\": [ 49.3127, 49.2358, 49.0781 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T22:14:56Z", "avg_ns": 519585666, "stddev_ns": 2247834, "avg_ts": 246.353217, "stddev_ts": 1.068105, "samples_ns": [ 517015396, 520559353, 521182251 ], "samples_ts": [ 247.575, 245.889, 245.595 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T22:14:58Z", "avg_ns": 2601167089, "stddev_ns": 6329539, "avg_ts": 49.208874, "stddev_ts": 0.119641, "samples_ns": [ 2595678302, 2599732635, 2608090332 ], "samples_ts": [ 49.3127, 49.2358, 49.0781 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 200 }, { "timestamp_utc": "2025-12-08T22:15:41.428631+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:15:07Z\",\n \"avg_ns\": 521948728,\n \"stddev_ns\": 1879738,\n \"avg_ts\": 245.236931,\n \"stddev_ts\": 0.881332,\n \"samples_ns\": [ 524114303, 520990887, 520740995 ],\n \"samples_ts\": [ 244.222, 245.686, 245.804 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:15:09Z\",\n \"avg_ns\": 10611593481,\n \"stddev_ns\": 7334058,\n \"avg_ts\": 48.249131,\n \"stddev_ts\": 0.033338,\n \"samples_ns\": [ 10610332225, 10619475642, 10604972577 ],\n \"samples_ts\": [ 48.2549, 48.2133, 48.2792 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T22:15:07Z", "avg_ns": 521948728, "stddev_ns": 1879738, "avg_ts": 245.236931, "stddev_ts": 0.881332, "samples_ns": [ 524114303, 520990887, 520740995 ], "samples_ts": [ 244.222, 245.686, 245.804 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T22:15:09Z", "avg_ns": 10611593481, "stddev_ns": 7334058, "avg_ts": 48.249131, "stddev_ts": 0.033338, "samples_ns": [ 10610332225, 10619475642, 10604972577 ], "samples_ts": [ 48.2549, 48.2133, 48.2792 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 201 }, { "timestamp_utc": "2025-12-08T22:15:58.779461+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:15:42Z\",\n \"avg_ns\": 2211679055,\n \"stddev_ns\": 6361717,\n \"avg_ts\": 231.499601,\n \"stddev_ts\": 0.664818,\n \"samples_ns\": [ 2207383462, 2218987292, 2208666412 ],\n \"samples_ts\": [ 231.949, 230.736, 231.814 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:15:50Z\",\n \"avg_ns\": 2594414779,\n \"stddev_ns\": 12467587,\n \"avg_ts\": 49.337513,\n \"stddev_ts\": 0.237515,\n \"samples_ns\": [ 2604910056, 2580633231, 2597701050 ],\n \"samples_ts\": [ 49.138, 49.6002, 49.2743 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T22:15:42Z", "avg_ns": 2211679055, "stddev_ns": 6361717, "avg_ts": 231.499601, "stddev_ts": 0.664818, "samples_ns": [ 2207383462, 2218987292, 2208666412 ], "samples_ts": [ 231.949, 230.736, 231.814 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T22:15:50Z", "avg_ns": 2594414779, "stddev_ns": 12467587, "avg_ts": 49.337513, "stddev_ts": 0.237515, "samples_ns": [ 2604910056, 2580633231, 2597701050 ], "samples_ts": [ 49.138, 49.6002, 49.2743 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 202 }, { "timestamp_utc": "2025-12-08T22:16:40.050929+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:15:59Z\",\n \"avg_ns\": 2211679210,\n \"stddev_ns\": 2101388,\n \"avg_ts\": 231.498449,\n \"stddev_ts\": 0.219961,\n \"samples_ns\": [ 2213070730, 2209263228, 2212703674 ],\n \"samples_ts\": [ 231.353, 231.751, 231.391 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:16:08Z\",\n \"avg_ns\": 10573793050,\n \"stddev_ns\": 22481223,\n \"avg_ts\": 48.421748,\n \"stddev_ts\": 0.102837,\n \"samples_ns\": [ 10599409998, 10557349573, 10564619581 ],\n \"samples_ts\": [ 48.3046, 48.497, 48.4636 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T22:15:59Z", "avg_ns": 2211679210, "stddev_ns": 2101388, "avg_ts": 231.498449, "stddev_ts": 0.219961, "samples_ns": [ 2213070730, 2209263228, 2212703674 ], "samples_ts": [ 231.353, 231.751, 231.391 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T22:16:08Z", "avg_ns": 10573793050, "stddev_ns": 22481223, "avg_ts": 48.421748, "stddev_ts": 0.102837, "samples_ns": [ 10599409998, 10557349573, 10564619581 ], "samples_ts": [ 48.3046, 48.497, 48.4636 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 203 }, { "timestamp_utc": "2025-12-08T22:16:50.632493+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:16:40Z\",\n \"avg_ns\": 519590120,\n \"stddev_ns\": 487956,\n \"avg_ts\": 246.348170,\n \"stddev_ts\": 0.231346,\n \"samples_ns\": [ 519105587, 519583343, 520081430 ],\n \"samples_ts\": [ 246.578, 246.351, 246.115 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:16:42Z\",\n \"avg_ns\": 2605729000,\n \"stddev_ns\": 7665492,\n \"avg_ts\": 49.122813,\n \"stddev_ts\": 0.144737,\n \"samples_ns\": [ 2609361272, 2610902823, 2596922907 ],\n \"samples_ts\": [ 49.0542, 49.0252, 49.2891 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T22:16:40Z", "avg_ns": 519590120, "stddev_ns": 487956, "avg_ts": 246.34817, "stddev_ts": 0.231346, "samples_ns": [ 519105587, 519583343, 520081430 ], "samples_ts": [ 246.578, 246.351, 246.115 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T22:16:42Z", "avg_ns": 2605729000, "stddev_ns": 7665492, "avg_ts": 49.122813, "stddev_ts": 0.144737, "samples_ns": [ 2609361272, 2610902823, 2596922907 ], "samples_ts": [ 49.0542, 49.0252, 49.2891 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 204 }, { "timestamp_utc": "2025-12-08T22:17:26.193025+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:16:51Z\",\n \"avg_ns\": 521040784,\n \"stddev_ns\": 1976165,\n \"avg_ts\": 245.664503,\n \"stddev_ts\": 0.930378,\n \"samples_ns\": [ 523216741, 519358587, 520547026 ],\n \"samples_ts\": [ 244.64, 246.458, 245.895 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:16:53Z\",\n \"avg_ns\": 10922595012,\n \"stddev_ns\": 377927771,\n \"avg_ts\": 46.912018,\n \"stddev_ts\": 1.591469,\n \"samples_ns\": [ 10695809711, 10713101118, 11358874207 ],\n \"samples_ts\": [ 47.8692, 47.792, 45.0749 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T22:16:51Z", "avg_ns": 521040784, "stddev_ns": 1976165, "avg_ts": 245.664503, "stddev_ts": 0.930378, "samples_ns": [ 523216741, 519358587, 520547026 ], "samples_ts": [ 244.64, 246.458, 245.895 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T22:16:53Z", "avg_ns": 10922595012, "stddev_ns": 377927771, "avg_ts": 46.912018, "stddev_ts": 1.591469, "samples_ns": [ 10695809711, 10713101118, 11358874207 ], "samples_ts": [ 47.8692, 47.792, 45.0749 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 205 }, { "timestamp_utc": "2025-12-08T22:17:43.219282+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:17:26Z\",\n \"avg_ns\": 2122300457,\n \"stddev_ns\": 6635096,\n \"avg_ts\": 241.249222,\n \"stddev_ts\": 0.753607,\n \"samples_ns\": [ 2129462608, 2116363071, 2121075692 ],\n \"samples_ts\": [ 240.436, 241.924, 241.387 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:17:35Z\",\n \"avg_ns\": 2606926486,\n \"stddev_ns\": 25327438,\n \"avg_ts\": 49.103066,\n \"stddev_ts\": 0.478707,\n \"samples_ns\": [ 2613268309, 2628480257, 2579030894 ],\n \"samples_ts\": [ 48.9808, 48.6973, 49.631 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T22:17:26Z", "avg_ns": 2122300457, "stddev_ns": 6635096, "avg_ts": 241.249222, "stddev_ts": 0.753607, "samples_ns": [ 2129462608, 2116363071, 2121075692 ], "samples_ts": [ 240.436, 241.924, 241.387 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T22:17:35Z", "avg_ns": 2606926486, "stddev_ns": 25327438, "avg_ts": 49.103066, "stddev_ts": 0.478707, "samples_ns": [ 2613268309, 2628480257, 2579030894 ], "samples_ts": [ 48.9808, 48.6973, 49.631 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 206 }, { "timestamp_utc": "2025-12-08T22:18:26.332478+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:17:43Z\",\n \"avg_ns\": 2118175516,\n \"stddev_ns\": 3691010,\n \"avg_ts\": 241.717947,\n \"stddev_ts\": 0.421367,\n \"samples_ns\": [ 2114150180, 2118977061, 2121399309 ],\n \"samples_ts\": [ 242.178, 241.626, 241.35 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:17:52Z\",\n \"avg_ns\": 11313086075,\n \"stddev_ns\": 244178282,\n \"avg_ts\": 45.271211,\n \"stddev_ts\": 0.965100,\n \"samples_ns\": [ 11595024274, 11174599221, 11169634731 ],\n \"samples_ts\": [ 44.1569, 45.8182, 45.8386 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T22:17:43Z", "avg_ns": 2118175516, "stddev_ns": 3691010, "avg_ts": 241.717947, "stddev_ts": 0.421367, "samples_ns": [ 2114150180, 2118977061, 2121399309 ], "samples_ts": [ 242.178, 241.626, 241.35 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T22:17:52Z", "avg_ns": 11313086075, "stddev_ns": 244178282, "avg_ts": 45.271211, "stddev_ts": 0.9651, "samples_ns": [ 11595024274, 11174599221, 11169634731 ], "samples_ts": [ 44.1569, 45.8182, 45.8386 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 207 }, { "timestamp_utc": "2025-12-08T22:18:37.582089+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:18:26Z\",\n \"avg_ns\": 520432646,\n \"stddev_ns\": 1028523,\n \"avg_ts\": 245.949854,\n \"stddev_ts\": 0.486374,\n \"samples_ns\": [ 520966263, 519247547, 521084130 ],\n \"samples_ts\": [ 245.697, 246.511, 245.642 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:18:29Z\",\n \"avg_ns\": 2819245159,\n \"stddev_ns\": 348063139,\n \"avg_ts\": 45.837106,\n \"stddev_ts\": 5.283778,\n \"samples_ns\": [ 2609479430, 2627232919, 3221023128 ],\n \"samples_ts\": [ 49.0519, 48.7205, 39.7389 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T22:18:26Z", "avg_ns": 520432646, "stddev_ns": 1028523, "avg_ts": 245.949854, "stddev_ts": 0.486374, "samples_ns": [ 520966263, 519247547, 521084130 ], "samples_ts": [ 245.697, 246.511, 245.642 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T22:18:29Z", "avg_ns": 2819245159, "stddev_ns": 348063139, "avg_ts": 45.837106, "stddev_ts": 5.283778, "samples_ns": [ 2609479430, 2627232919, 3221023128 ], "samples_ts": [ 49.0519, 48.7205, 39.7389 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 208 }, { "timestamp_utc": "2025-12-08T22:19:12.899427+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:18:38Z\",\n \"avg_ns\": 521173366,\n \"stddev_ns\": 1097741,\n \"avg_ts\": 245.600384,\n \"stddev_ts\": 0.517491,\n \"samples_ns\": [ 522203057, 520018334, 521298707 ],\n \"samples_ts\": [ 245.115, 246.145, 245.541 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:18:40Z\",\n \"avg_ns\": 10838908470,\n \"stddev_ns\": 343902387,\n \"avg_ts\": 47.268440,\n \"stddev_ts\": 1.475605,\n \"samples_ns\": [ 10694411638, 11231483814, 10590829959 ],\n \"samples_ts\": [ 47.8755, 45.5861, 48.3437 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T22:18:38Z", "avg_ns": 521173366, "stddev_ns": 1097741, "avg_ts": 245.600384, "stddev_ts": 0.517491, "samples_ns": [ 522203057, 520018334, 521298707 ], "samples_ts": [ 245.115, 246.145, 245.541 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T22:18:40Z", "avg_ns": 10838908470, "stddev_ns": 343902387, "avg_ts": 47.26844, "stddev_ts": 1.475605, "samples_ns": [ 10694411638, 11231483814, 10590829959 ], "samples_ts": [ 47.8755, 45.5861, 48.3437 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 209 }, { "timestamp_utc": "2025-12-08T22:19:30.335289+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:19:13Z\",\n \"avg_ns\": 2276860130,\n \"stddev_ns\": 263800074,\n \"avg_ts\": 226.773357,\n \"stddev_ts\": 24.627891,\n \"samples_ns\": [ 2127399367, 2121728413, 2581452612 ],\n \"samples_ts\": [ 240.669, 241.313, 198.338 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:19:22Z\",\n \"avg_ns\": 2588765700,\n \"stddev_ns\": 4099829,\n \"avg_ts\": 49.444496,\n \"stddev_ts\": 0.078360,\n \"samples_ns\": [ 2590328620, 2591853863, 2584114618 ],\n \"samples_ts\": [ 49.4146, 49.3855, 49.5334 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T22:19:13Z", "avg_ns": 2276860130, "stddev_ns": 263800074, "avg_ts": 226.773357, "stddev_ts": 24.627891, "samples_ns": [ 2127399367, 2121728413, 2581452612 ], "samples_ts": [ 240.669, 241.313, 198.338 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T22:19:22Z", "avg_ns": 2588765700, "stddev_ns": 4099829, "avg_ts": 49.444496, "stddev_ts": 0.07836, "samples_ns": [ 2590328620, 2591853863, 2584114618 ], "samples_ts": [ 49.4146, 49.3855, 49.5334 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 210 }, { "timestamp_utc": "2025-12-08T22:20:12.911529+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:19:30Z\",\n \"avg_ns\": 2254415146,\n \"stddev_ns\": 243509175,\n \"avg_ts\": 228.785638,\n \"stddev_ts\": 23.271666,\n \"samples_ns\": [ 2535402519, 2104905318, 2122937601 ],\n \"samples_ts\": [ 201.94, 243.241, 241.175 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:19:39Z\",\n \"avg_ns\": 10983064562,\n \"stddev_ns\": 370336481,\n \"avg_ts\": 46.653289,\n \"stddev_ts\": 1.604308,\n \"samples_ns\": [ 11201459577, 10555469801, 11192264309 ],\n \"samples_ts\": [ 45.7083, 48.5057, 45.7459 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T22:19:30Z", "avg_ns": 2254415146, "stddev_ns": 243509175, "avg_ts": 228.785638, "stddev_ts": 23.271666, "samples_ns": [ 2535402519, 2104905318, 2122937601 ], "samples_ts": [ 201.94, 243.241, 241.175 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T22:19:39Z", "avg_ns": 10983064562, "stddev_ns": 370336481, "avg_ts": 46.653289, "stddev_ts": 1.604308, "samples_ns": [ 11201459577, 10555469801, 11192264309 ], "samples_ts": [ 45.7083, 48.5057, 45.7459 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 211 }, { "timestamp_utc": "2025-12-08T22:20:23.533660+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:20:13Z\",\n \"avg_ns\": 523238470,\n \"stddev_ns\": 1599980,\n \"avg_ts\": 244.631852,\n \"stddev_ts\": 0.746707,\n \"samples_ns\": [ 522564722, 522085903, 525064787 ],\n \"samples_ts\": [ 244.946, 245.17, 243.779 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:20:15Z\",\n \"avg_ns\": 2607326923,\n \"stddev_ns\": 7854488,\n \"avg_ts\": 49.092722,\n \"stddev_ts\": 0.148080,\n \"samples_ns\": [ 2598535279, 2609793178, 2613652312 ],\n \"samples_ts\": [ 49.2585, 49.046, 48.9736 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T22:20:13Z", "avg_ns": 523238470, "stddev_ns": 1599980, "avg_ts": 244.631852, "stddev_ts": 0.746707, "samples_ns": [ 522564722, 522085903, 525064787 ], "samples_ts": [ 244.946, 245.17, 243.779 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T22:20:15Z", "avg_ns": 2607326923, "stddev_ns": 7854488, "avg_ts": 49.092722, "stddev_ts": 0.14808, "samples_ns": [ 2598535279, 2609793178, 2613652312 ], "samples_ts": [ 49.2585, 49.046, 48.9736 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 212 }, { "timestamp_utc": "2025-12-08T22:20:58.012928+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:20:24Z\",\n \"avg_ns\": 521918368,\n \"stddev_ns\": 945938,\n \"avg_ts\": 245.249617,\n \"stddev_ts\": 0.444491,\n \"samples_ns\": [ 520886156, 522126809, 522742141 ],\n \"samples_ts\": [ 245.735, 245.151, 244.863 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:20:26Z\",\n \"avg_ns\": 10558176416,\n \"stddev_ns\": 38854732,\n \"avg_ts\": 48.493660,\n \"stddev_ts\": 0.178415,\n \"samples_ns\": [ 10556352584, 10597910947, 10520265717 ],\n \"samples_ts\": [ 48.5016, 48.3114, 48.668 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T22:20:24Z", "avg_ns": 521918368, "stddev_ns": 945938, "avg_ts": 245.249617, "stddev_ts": 0.444491, "samples_ns": [ 520886156, 522126809, 522742141 ], "samples_ts": [ 245.735, 245.151, 244.863 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T22:20:26Z", "avg_ns": 10558176416, "stddev_ns": 38854732, "avg_ts": 48.49366, "stddev_ts": 0.178415, "samples_ns": [ 10556352584, 10597910947, 10520265717 ], "samples_ts": [ 48.5016, 48.3114, 48.668 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 213 }, { "timestamp_utc": "2025-12-08T22:21:15.297095+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:20:58Z\",\n \"avg_ns\": 2213759326,\n \"stddev_ns\": 7364574,\n \"avg_ts\": 231.282496,\n \"stddev_ts\": 0.770434,\n \"samples_ns\": [ 2219795553, 2215928478, 2205553948 ],\n \"samples_ts\": [ 230.652, 231.054, 232.141 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:21:07Z\",\n \"avg_ns\": 2572220260,\n \"stddev_ns\": 15998016,\n \"avg_ts\": 49.763736,\n \"stddev_ts\": 0.308437,\n \"samples_ns\": [ 2590621809, 2564423486, 2561615486 ],\n \"samples_ts\": [ 49.409, 49.9138, 49.9685 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T22:20:58Z", "avg_ns": 2213759326, "stddev_ns": 7364574, "avg_ts": 231.282496, "stddev_ts": 0.770434, "samples_ns": [ 2219795553, 2215928478, 2205553948 ], "samples_ts": [ 230.652, 231.054, 232.141 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T22:21:07Z", "avg_ns": 2572220260, "stddev_ns": 15998016, "avg_ts": 49.763736, "stddev_ts": 0.308437, "samples_ns": [ 2590621809, 2564423486, 2561615486 ], "samples_ts": [ 49.409, 49.9138, 49.9685 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 214 }, { "timestamp_utc": "2025-12-08T22:21:56.599048+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:21:15Z\",\n \"avg_ns\": 2208598319,\n \"stddev_ns\": 2841298,\n \"avg_ts\": 231.821495,\n \"stddev_ts\": 0.298194,\n \"samples_ns\": [ 2211431587, 2208613543, 2205749828 ],\n \"samples_ts\": [ 231.524, 231.82, 232.121 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:21:24Z\",\n \"avg_ns\": 10581687672,\n \"stddev_ns\": 35920536,\n \"avg_ts\": 48.385849,\n \"stddev_ts\": 0.164573,\n \"samples_ns\": [ 10540213585, 10601966460, 10602882971 ],\n \"samples_ts\": [ 48.5759, 48.2929, 48.2888 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T22:21:15Z", "avg_ns": 2208598319, "stddev_ns": 2841298, "avg_ts": 231.821495, "stddev_ts": 0.298194, "samples_ns": [ 2211431587, 2208613543, 2205749828 ], "samples_ts": [ 231.524, 231.82, 232.121 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_type": "gemma3 270M Q2_K - Medium", "model_size": 230552064, "model_n_params": 268098176, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T22:21:24Z", "avg_ns": 10581687672, "stddev_ns": 35920536, "avg_ts": 48.385849, "stddev_ts": 0.164573, "samples_ns": [ 10540213585, 10601966460, 10602882971 ], "samples_ts": [ 48.5759, 48.2929, 48.2888 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-270M-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 215 }, { "timestamp_utc": "2025-12-08T22:23:21.495932+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:22:02Z\",\n \"avg_ns\": 6893012133,\n \"stddev_ns\": 323399123,\n \"avg_ts\": 18.596101,\n \"stddev_ts\": 0.849490,\n \"samples_ns\": [ 6711480683, 7266393727, 6701161991 ],\n \"samples_ts\": [ 19.0718, 17.6153, 19.1012 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:22:29Z\",\n \"avg_ns\": 17101803483,\n \"stddev_ns\": 307608301,\n \"avg_ts\": 7.486189,\n \"stddev_ts\": 0.133317,\n \"samples_ns\": [ 17455657011, 16898163547, 16951589893 ],\n \"samples_ts\": [ 7.33287, 7.57479, 7.55091 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T22:22:02Z", "avg_ns": 6893012133, "stddev_ns": 323399123, "avg_ts": 18.596101, "stddev_ts": 0.84949, "samples_ns": [ 6711480683, 7266393727, 6701161991 ], "samples_ts": [ 19.0718, 17.6153, 19.1012 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T22:22:29Z", "avg_ns": 17101803483, "stddev_ns": 307608301, "avg_ts": 7.486189, "stddev_ts": 0.133317, "samples_ns": [ 17455657011, 16898163547, 16951589893 ], "samples_ts": [ 7.33287, 7.57479, 7.55091 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 512, "ubatch_size": 128, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 216 }, { "timestamp_utc": "2025-12-08T22:27:17.278660+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:23:22Z\",\n \"avg_ns\": 6916308559,\n \"stddev_ns\": 306180164,\n \"avg_ts\": 18.530590,\n \"stddev_ts\": 0.799919,\n \"samples_ns\": [ 6744689190, 7269805350, 6734431137 ],\n \"samples_ts\": [ 18.9779, 17.6071, 19.0068 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:23:49Z\",\n \"avg_ns\": 69102441542,\n \"stddev_ns\": 993948940,\n \"avg_ts\": 7.410307,\n \"stddev_ts\": 0.106116,\n \"samples_ns\": [ 68883735669, 70187530183, 68236058776 ],\n \"samples_ts\": [ 7.43281, 7.29474, 7.50336 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T22:23:22Z", "avg_ns": 6916308559, "stddev_ns": 306180164, "avg_ts": 18.53059, "stddev_ts": 0.799919, "samples_ns": [ 6744689190, 7269805350, 6734431137 ], "samples_ts": [ 18.9779, 17.6071, 19.0068 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T22:23:49Z", "avg_ns": 69102441542, "stddev_ns": 993948940, "avg_ts": 7.410307, "stddev_ts": 0.106116, "samples_ns": [ 68883735669, 70187530183, 68236058776 ], "samples_ts": [ 7.43281, 7.29474, 7.50336 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 512, "ubatch_size": 128, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 217 }, { "timestamp_utc": "2025-12-08T22:30:01.031337+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:27:18Z\",\n \"avg_ns\": 27785217750,\n \"stddev_ns\": 313141080,\n \"avg_ts\": 18.428635,\n \"stddev_ts\": 0.209052,\n \"samples_ns\": [ 27963896631, 27423641880, 27968114741 ],\n \"samples_ts\": [ 18.3093, 18.67, 18.3066 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:29:09Z\",\n \"avg_ns\": 17077206479,\n \"stddev_ns\": 384374519,\n \"avg_ts\": 7.497874,\n \"stddev_ts\": 0.166770,\n \"samples_ns\": [ 16907882358, 16806564757, 17517172322 ],\n \"samples_ts\": [ 7.57043, 7.61607, 7.30712 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T22:27:18Z", "avg_ns": 27785217750, "stddev_ns": 313141080, "avg_ts": 18.428635, "stddev_ts": 0.209052, "samples_ns": [ 27963896631, 27423641880, 27968114741 ], "samples_ts": [ 18.3093, 18.67, 18.3066 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T22:29:09Z", "avg_ns": 17077206479, "stddev_ns": 384374519, "avg_ts": 7.497874, "stddev_ts": 0.16677, "samples_ns": [ 16907882358, 16806564757, 17517172322 ], "samples_ts": [ 7.57043, 7.61607, 7.30712 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 512, "ubatch_size": 128, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 218 }, { "timestamp_utc": "2025-12-08T22:35:19.336992+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:30:02Z\",\n \"avg_ns\": 27170047853,\n \"stddev_ns\": 166832726,\n \"avg_ts\": 18.844756,\n \"stddev_ts\": 0.116069,\n \"samples_ns\": [ 27235668478, 27294092916, 26980382165 ],\n \"samples_ts\": [ 18.7989, 18.7586, 18.9768 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:31:50Z\",\n \"avg_ns\": 69491803158,\n \"stddev_ns\": 694416459,\n \"avg_ts\": 7.368263,\n \"stddev_ts\": 0.073233,\n \"samples_ns\": [ 70288088054, 69175280896, 69012040526 ],\n \"samples_ts\": [ 7.28431, 7.40149, 7.419 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T22:30:02Z", "avg_ns": 27170047853, "stddev_ns": 166832726, "avg_ts": 18.844756, "stddev_ts": 0.116069, "samples_ns": [ 27235668478, 27294092916, 26980382165 ], "samples_ts": [ 18.7989, 18.7586, 18.9768 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T22:31:50Z", "avg_ns": 69491803158, "stddev_ns": 694416459, "avg_ts": 7.368263, "stddev_ts": 0.073233, "samples_ns": [ 70288088054, 69175280896, 69012040526 ], "samples_ts": [ 7.28431, 7.40149, 7.419 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 512, "ubatch_size": 128, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 219 }, { "timestamp_utc": "2025-12-08T22:36:39.219347+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:35:20Z\",\n \"avg_ns\": 6714586702,\n \"stddev_ns\": 24869360,\n \"avg_ts\": 19.063150,\n \"stddev_ts\": 0.070582,\n \"samples_ns\": [ 6690519798, 6713052796, 6740187512 ],\n \"samples_ts\": [ 19.1315, 19.0673, 18.9906 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:35:47Z\",\n \"avg_ns\": 17172625603,\n \"stddev_ns\": 3133468105,\n \"avg_ts\": 7.463513,\n \"stddev_ts\": 0.326842,\n \"samples_ns\": [ 18063483191, 16721730433, 16732663186 ],\n \"samples_ts\": [ 7.08612, 7.65471, 7.64971 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T22:35:20Z", "avg_ns": 6714586702, "stddev_ns": 24869360, "avg_ts": 19.06315, "stddev_ts": 0.070582, "samples_ns": [ 6690519798, 6713052796, 6740187512 ], "samples_ts": [ 19.1315, 19.0673, 18.9906 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T22:35:47Z", "avg_ns": 17172625603, "stddev_ns": 3133468105, "avg_ts": 7.463513, "stddev_ts": 0.326842, "samples_ns": [ 18063483191, 16721730433, 16732663186 ], "samples_ts": [ 7.08612, 7.65471, 7.64971 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 512, "ubatch_size": 256, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 220 }, { "timestamp_utc": "2025-12-08T22:40:33.659742+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:36:39Z\",\n \"avg_ns\": 6741322625,\n \"stddev_ns\": 7244026,\n \"avg_ts\": 18.987387,\n \"stddev_ts\": 0.020391,\n \"samples_ns\": [ 6736521702, 6737791032, 6749655141 ],\n \"samples_ts\": [ 19.0009, 18.9973, 18.9639 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:37:06Z\",\n \"avg_ns\": 68840403239,\n \"stddev_ns\": 330388214,\n \"avg_ts\": 7.437607,\n \"stddev_ts\": 0.035793,\n \"samples_ns\": [ 69007566663, 69053804127, 68459838927 ],\n \"samples_ts\": [ 7.41948, 7.41451, 7.47884 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T22:36:39Z", "avg_ns": 6741322625, "stddev_ns": 7244026, "avg_ts": 18.987387, "stddev_ts": 0.020391, "samples_ns": [ 6736521702, 6737791032, 6749655141 ], "samples_ts": [ 19.0009, 18.9973, 18.9639 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T22:37:06Z", "avg_ns": 68840403239, "stddev_ns": 330388214, "avg_ts": 7.437607, "stddev_ts": 0.035793, "samples_ns": [ 69007566663, 69053804127, 68459838927 ], "samples_ts": [ 7.41948, 7.41451, 7.47884 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 512, "ubatch_size": 256, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 221 }, { "timestamp_utc": "2025-12-08T22:43:19.926901+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:40:34Z\",\n \"avg_ns\": 27463709398,\n \"stddev_ns\": 320556632,\n \"avg_ts\": 18.644466,\n \"stddev_ts\": 0.216164,\n \"samples_ns\": [ 27272874827, 27284457566, 27833795803 ],\n \"samples_ts\": [ 18.7732, 18.7653, 18.3949 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:42:25Z\",\n \"avg_ns\": 18146722237,\n \"stddev_ns\": 343355140,\n \"avg_ts\": 7.055317,\n \"stddev_ts\": 0.134890,\n \"samples_ns\": [ 18381543012, 18305965279, 17752658422 ],\n \"samples_ts\": [ 6.96351, 6.99226, 7.21019 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T22:40:34Z", "avg_ns": 27463709398, "stddev_ns": 320556632, "avg_ts": 18.644466, "stddev_ts": 0.216164, "samples_ns": [ 27272874827, 27284457566, 27833795803 ], "samples_ts": [ 18.7732, 18.7653, 18.3949 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T22:42:25Z", "avg_ns": 18146722237, "stddev_ns": 343355140, "avg_ts": 7.055317, "stddev_ts": 0.13489, "samples_ns": [ 18381543012, 18305965279, 17752658422 ], "samples_ts": [ 6.96351, 6.99226, 7.21019 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 512, "ubatch_size": 256, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 222 }, { "timestamp_utc": "2025-12-08T22:48:36.854705+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:43:20Z\",\n \"avg_ns\": 27710104300,\n \"stddev_ns\": 546211649,\n \"avg_ts\": 18.481817,\n \"stddev_ts\": 0.365478,\n \"samples_ns\": [ 28225878851, 27766588820, 27137845229 ],\n \"samples_ts\": [ 18.1394, 18.4394, 18.8666 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:45:11Z\",\n \"avg_ns\": 68531446819,\n \"stddev_ns\": 953620468,\n \"avg_ts\": 7.471984,\n \"stddev_ts\": 0.103673,\n \"samples_ns\": [ 69547021694, 67655084174, 68392234590 ],\n \"samples_ts\": [ 7.36193, 7.5678, 7.48623 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T22:43:20Z", "avg_ns": 27710104300, "stddev_ns": 546211649, "avg_ts": 18.481817, "stddev_ts": 0.365478, "samples_ns": [ 28225878851, 27766588820, 27137845229 ], "samples_ts": [ 18.1394, 18.4394, 18.8666 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T22:45:11Z", "avg_ns": 68531446819, "stddev_ns": 953620468, "avg_ts": 7.471984, "stddev_ts": 0.103673, "samples_ns": [ 69547021694, 67655084174, 68392234590 ], "samples_ts": [ 7.36193, 7.5678, 7.48623 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 512, "ubatch_size": 256, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 223 }, { "timestamp_utc": "2025-12-08T22:49:56.190269+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:48:37Z\",\n \"avg_ns\": 6716042022,\n \"stddev_ns\": 9613332,\n \"avg_ts\": 19.058870,\n \"stddev_ts\": 0.027276,\n \"samples_ns\": [ 6706812264, 6715316037, 6725997765 ],\n \"samples_ts\": [ 19.0851, 19.0609, 19.0306 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:49:04Z\",\n \"avg_ns\": 17166911214,\n \"stddev_ns\": 399149493,\n \"avg_ts\": 7.458928,\n \"stddev_ts\": 0.175722,\n \"samples_ns\": [ 17364599882, 16707498334, 17428635428 ],\n \"samples_ts\": [ 7.37132, 7.66123, 7.34424 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T22:48:37Z", "avg_ns": 6716042022, "stddev_ns": 9613332, "avg_ts": 19.05887, "stddev_ts": 0.027276, "samples_ns": [ 6706812264, 6715316037, 6725997765 ], "samples_ts": [ 19.0851, 19.0609, 19.0306 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T22:49:04Z", "avg_ns": 17166911214, "stddev_ns": 399149493, "avg_ts": 7.458928, "stddev_ts": 0.175722, "samples_ns": [ 17364599882, 16707498334, 17428635428 ], "samples_ts": [ 7.37132, 7.66123, 7.34424 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 512, "ubatch_size": 512, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 224 }, { "timestamp_utc": "2025-12-08T22:54:05.072244+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:49:56Z\",\n \"avg_ns\": 6893188321,\n \"stddev_ns\": 305708366,\n \"avg_ts\": 18.592827,\n \"stddev_ts\": 0.803994,\n \"samples_ns\": [ 6717714334, 6715662707, 7246187924 ],\n \"samples_ts\": [ 19.0541, 19.0599, 17.6645 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:50:24Z\",\n \"avg_ns\": 73317992090,\n \"stddev_ns\": 467279603,\n \"avg_ts\": 6.983468,\n \"stddev_ts\": 0.044365,\n \"samples_ns\": [ 73130114978, 73849968002, 72973893292 ],\n \"samples_ts\": [ 7.00122, 6.93298, 7.01621 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T22:49:56Z", "avg_ns": 6893188321, "stddev_ns": 305708366, "avg_ts": 18.592827, "stddev_ts": 0.803994, "samples_ns": [ 6717714334, 6715662707, 7246187924 ], "samples_ts": [ 19.0541, 19.0599, 17.6645 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T22:50:24Z", "avg_ns": 73317992090, "stddev_ns": 467279603, "avg_ts": 6.983468, "stddev_ts": 0.044365, "samples_ns": [ 73130114978, 73849968002, 72973893292 ], "samples_ts": [ 7.00122, 6.93298, 7.01621 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 512, "ubatch_size": 512, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 225 }, { "timestamp_utc": "2025-12-08T22:56:49.027808+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:54:05Z\",\n \"avg_ns\": 28126258388,\n \"stddev_ns\": 343987058,\n \"avg_ts\": 18.205433,\n \"stddev_ts\": 0.221131,\n \"samples_ns\": [ 28522401415, 27953288021, 27903085730 ],\n \"samples_ts\": [ 17.9508, 18.3163, 18.3492 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:55:58Z\",\n \"avg_ns\": 16852767552,\n \"stddev_ns\": 48133980,\n \"avg_ts\": 7.595233,\n \"stddev_ts\": 0.021676,\n \"samples_ns\": [ 16809716410, 16904736720, 16843849527 ],\n \"samples_ts\": [ 7.61464, 7.57184, 7.59921 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T22:54:05Z", "avg_ns": 28126258388, "stddev_ns": 343987058, "avg_ts": 18.205433, "stddev_ts": 0.221131, "samples_ns": [ 28522401415, 27953288021, 27903085730 ], "samples_ts": [ 17.9508, 18.3163, 18.3492 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T22:55:58Z", "avg_ns": 16852767552, "stddev_ns": 48133980, "avg_ts": 7.595233, "stddev_ts": 0.021676, "samples_ns": [ 16809716410, 16904736720, 16843849527 ], "samples_ts": [ 7.61464, 7.57184, 7.59921 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 512, "ubatch_size": 512, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 226 }, { "timestamp_utc": "2025-12-08T23:02:09.840267+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:56:49Z\",\n \"avg_ns\": 27828748130,\n \"stddev_ns\": 629402824,\n \"avg_ts\": 18.404436,\n \"stddev_ts\": 0.410895,\n \"samples_ns\": [ 28555407972, 27454371913, 27476464505 ],\n \"samples_ts\": [ 17.9301, 18.6491, 18.6341 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:58:41Z\",\n \"avg_ns\": 69459497698,\n \"stddev_ns\": 596057671,\n \"avg_ts\": 7.371565,\n \"stddev_ts\": 0.063438,\n \"samples_ns\": [ 69974763117, 69597036901, 68806693077 ],\n \"samples_ts\": [ 7.31692, 7.35664, 7.44114 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T22:56:49Z", "avg_ns": 27828748130, "stddev_ns": 629402824, "avg_ts": 18.404436, "stddev_ts": 0.410895, "samples_ns": [ 28555407972, 27454371913, 27476464505 ], "samples_ts": [ 17.9301, 18.6491, 18.6341 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T22:58:41Z", "avg_ns": 69459497698, "stddev_ns": 596057671, "avg_ts": 7.371565, "stddev_ts": 0.063438, "samples_ns": [ 69974763117, 69597036901, 68806693077 ], "samples_ts": [ 7.31692, 7.35664, 7.44114 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 512, "ubatch_size": 512, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 227 }, { "timestamp_utc": "2025-12-08T23:03:28.864381+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:02:10Z\",\n \"avg_ns\": 6716107213,\n \"stddev_ns\": 8397416,\n \"avg_ts\": 19.058679,\n \"stddev_ts\": 0.023844,\n \"samples_ns\": [ 6720337750, 6721547082, 6706436809 ],\n \"samples_ts\": [ 19.0467, 19.0432, 19.0861 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:02:37Z\",\n \"avg_ns\": 17052852439,\n \"stddev_ns\": 346622209,\n \"avg_ts\": 7.508129,\n \"stddev_ts\": 0.151568,\n \"samples_ns\": [ 16967828222, 17434075463, 16756653634 ],\n \"samples_ts\": [ 7.54369, 7.34194, 7.63876 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T23:02:10Z", "avg_ns": 6716107213, "stddev_ns": 8397416, "avg_ts": 19.058679, "stddev_ts": 0.023844, "samples_ns": [ 6720337750, 6721547082, 6706436809 ], "samples_ts": [ 19.0467, 19.0432, 19.0861 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T23:02:37Z", "avg_ns": 17052852439, "stddev_ns": 346622209, "avg_ts": 7.508129, "stddev_ts": 0.151568, "samples_ns": [ 16967828222, 17434075463, 16756653634 ], "samples_ts": [ 7.54369, 7.34194, 7.63876 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 228 }, { "timestamp_utc": "2025-12-08T23:07:24.735592+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:03:29Z\",\n \"avg_ns\": 6923738974,\n \"stddev_ns\": 322076063,\n \"avg_ts\": 18.513132,\n \"stddev_ts\": 0.838742,\n \"samples_ns\": [ 6729149802, 6746562662, 7295504459 ],\n \"samples_ts\": [ 19.0217, 18.9726, 17.5451 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:03:57Z\",\n \"avg_ns\": 69028769105,\n \"stddev_ns\": 556584700,\n \"avg_ts\": 7.417518,\n \"stddev_ts\": 0.059739,\n \"samples_ns\": [ 68501671379, 68973864644, 69610771292 ],\n \"samples_ts\": [ 7.47427, 7.4231, 7.35518 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T23:03:29Z", "avg_ns": 6923738974, "stddev_ns": 322076063, "avg_ts": 18.513132, "stddev_ts": 0.838742, "samples_ns": [ 6729149802, 6746562662, 7295504459 ], "samples_ts": [ 19.0217, 18.9726, 17.5451 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T23:03:57Z", "avg_ns": 69028769105, "stddev_ns": 556584700, "avg_ts": 7.417518, "stddev_ts": 0.059739, "samples_ns": [ 68501671379, 68973864644, 69610771292 ], "samples_ts": [ 7.47427, 7.4231, 7.35518 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 229 }, { "timestamp_utc": "2025-12-08T23:10:05.368875+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:07:25Z\",\n \"avg_ns\": 27190962773,\n \"stddev_ns\": 279858033,\n \"avg_ts\": 18.831108,\n \"stddev_ts\": 0.192671,\n \"samples_ns\": [ 27029733048, 27029040637, 27514114636 ],\n \"samples_ts\": [ 18.9421, 18.9426, 18.6086 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:09:14Z\",\n \"avg_ns\": 17020891730,\n \"stddev_ns\": 324985457,\n \"avg_ts\": 7.521978,\n \"stddev_ts\": 0.142054,\n \"samples_ns\": [ 16829792349, 16836751802, 17396131041 ],\n \"samples_ts\": [ 7.60556, 7.60242, 7.35796 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T23:07:25Z", "avg_ns": 27190962773, "stddev_ns": 279858033, "avg_ts": 18.831108, "stddev_ts": 0.192671, "samples_ns": [ 27029733048, 27029040637, 27514114636 ], "samples_ts": [ 18.9421, 18.9426, 18.6086 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T23:09:14Z", "avg_ns": 17020891730, "stddev_ns": 324985457, "avg_ts": 7.521978, "stddev_ts": 0.142054, "samples_ns": [ 16829792349, 16836751802, 17396131041 ], "samples_ts": [ 7.60556, 7.60242, 7.35796 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 230 }, { "timestamp_utc": "2025-12-08T23:15:21.262227+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:10:06Z\",\n \"avg_ns\": 27331237871,\n \"stddev_ns\": 597762732,\n \"avg_ts\": 18.739045,\n \"stddev_ts\": 0.404736,\n \"samples_ns\": [ 26978183295, 26994116857, 28021413463 ],\n \"samples_ts\": [ 18.9783, 18.9671, 18.2717 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:11:55Z\",\n \"avg_ns\": 68649999295,\n \"stddev_ns\": 591879685,\n \"avg_ts\": 7.458489,\n \"stddev_ts\": 0.064071,\n \"samples_ns\": [ 69312623564, 68463663829, 68173710494 ],\n \"samples_ts\": [ 7.38682, 7.47842, 7.51023 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T23:10:06Z", "avg_ns": 27331237871, "stddev_ns": 597762732, "avg_ts": 18.739045, "stddev_ts": 0.404736, "samples_ns": [ 26978183295, 26994116857, 28021413463 ], "samples_ts": [ 18.9783, 18.9671, 18.2717 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T23:11:55Z", "avg_ns": 68649999295, "stddev_ns": 591879685, "avg_ts": 7.458489, "stddev_ts": 0.064071, "samples_ns": [ 69312623564, 68463663829, 68173710494 ], "samples_ts": [ 7.38682, 7.47842, 7.51023 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 231 }, { "timestamp_utc": "2025-12-08T23:16:40.164131+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:15:22Z\",\n \"avg_ns\": 6731078486,\n \"stddev_ns\": 760730,\n \"avg_ts\": 19.016269,\n \"stddev_ts\": 0.002149,\n \"samples_ns\": [ 6730239911, 6731271269, 6731724278 ],\n \"samples_ts\": [ 19.0186, 19.0157, 19.0144 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:15:48Z\",\n \"avg_ns\": 17005008765,\n \"stddev_ns\": 358554789,\n \"avg_ts\": 7.529399,\n \"stddev_ts\": 0.156856,\n \"samples_ns\": [ 16807289331, 17418895157, 16788841807 ],\n \"samples_ts\": [ 7.61574, 7.34834, 7.62411 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T23:15:22Z", "avg_ns": 6731078486, "stddev_ns": 760730, "avg_ts": 19.016269, "stddev_ts": 0.002149, "samples_ns": [ 6730239911, 6731271269, 6731724278 ], "samples_ts": [ 19.0186, 19.0157, 19.0144 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T23:15:48Z", "avg_ns": 17005008765, "stddev_ns": 358554789, "avg_ts": 7.529399, "stddev_ts": 0.156856, "samples_ns": [ 16807289331, 17418895157, 16788841807 ], "samples_ts": [ 7.61574, 7.34834, 7.62411 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 232 }, { "timestamp_utc": "2025-12-08T23:20:36.290065+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:16:41Z\",\n \"avg_ns\": 6890259634,\n \"stddev_ns\": 276482883,\n \"avg_ts\": 18.596459,\n \"stddev_ts\": 0.729333,\n \"samples_ns\": [ 7209483357, 6734512030, 6726783517 ],\n \"samples_ts\": [ 17.7544, 19.0066, 19.0284 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:17:08Z\",\n \"avg_ns\": 69069474952,\n \"stddev_ns\": 3276558287,\n \"avg_ts\": 7.414391,\n \"stddev_ts\": 0.131824,\n \"samples_ns\": [ 68994455890, 67878879761, 70335089206 ],\n \"samples_ts\": [ 7.42089, 7.54285, 7.27944 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T23:16:41Z", "avg_ns": 6890259634, "stddev_ns": 276482883, "avg_ts": 18.596459, "stddev_ts": 0.729333, "samples_ns": [ 7209483357, 6734512030, 6726783517 ], "samples_ts": [ 17.7544, 19.0066, 19.0284 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T23:17:08Z", "avg_ns": 69069474952, "stddev_ns": 3276558287, "avg_ts": 7.414391, "stddev_ts": 0.131824, "samples_ns": [ 68994455890, 67878879761, 70335089206 ], "samples_ts": [ 7.42089, 7.54285, 7.27944 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 233 }, { "timestamp_utc": "2025-12-08T23:23:18.181203+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:20:37Z\",\n \"avg_ns\": 27516081635,\n \"stddev_ns\": 302051769,\n \"avg_ts\": 18.608803,\n \"stddev_ts\": 0.205264,\n \"samples_ns\": [ 27614482363, 27177099898, 27756662644 ],\n \"samples_ts\": [ 18.541, 18.8394, 18.446 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:22:26Z\",\n \"avg_ns\": 17055231949,\n \"stddev_ns\": 360095227,\n \"avg_ts\": 7.507233,\n \"stddev_ts\": 0.156651,\n \"samples_ns\": [ 16876797659, 16819196206, 17469701984 ],\n \"samples_ts\": [ 7.58438, 7.61035, 7.32697 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T23:20:37Z", "avg_ns": 27516081635, "stddev_ns": 302051769, "avg_ts": 18.608803, "stddev_ts": 0.205264, "samples_ns": [ 27614482363, 27177099898, 27756662644 ], "samples_ts": [ 18.541, 18.8394, 18.446 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T23:22:26Z", "avg_ns": 17055231949, "stddev_ns": 360095227, "avg_ts": 7.507233, "stddev_ts": 0.156651, "samples_ns": [ 16876797659, 16819196206, 17469701984 ], "samples_ts": [ 7.58438, 7.61035, 7.32697 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 234 }, { "timestamp_utc": "2025-12-08T23:28:36.475703+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:23:18Z\",\n \"avg_ns\": 28111818839,\n \"stddev_ns\": 364560679,\n \"avg_ts\": 18.215037,\n \"stddev_ts\": 0.237870,\n \"samples_ns\": [ 28273792384, 28367325546, 27694338589 ],\n \"samples_ts\": [ 18.1086, 18.0489, 18.4875 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:25:11Z\",\n \"avg_ns\": 68419080592,\n \"stddev_ns\": 260371733,\n \"avg_ts\": 7.483365,\n \"stddev_ts\": 0.028523,\n \"samples_ns\": [ 68128356450, 68498088256, 68630797072 ],\n \"samples_ts\": [ 7.51523, 7.47466, 7.46021 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T23:23:18Z", "avg_ns": 28111818839, "stddev_ns": 364560679, "avg_ts": 18.215037, "stddev_ts": 0.23787, "samples_ns": [ 28273792384, 28367325546, 27694338589 ], "samples_ts": [ 18.1086, 18.0489, 18.4875 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T23:25:11Z", "avg_ns": 68419080592, "stddev_ns": 260371733, "avg_ts": 7.483365, "stddev_ts": 0.028523, "samples_ns": [ 68128356450, 68498088256, 68630797072 ], "samples_ts": [ 7.51523, 7.47466, 7.46021 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 235 }, { "timestamp_utc": "2025-12-08T23:29:55.152989+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:28:37Z\",\n \"avg_ns\": 6737669330,\n \"stddev_ns\": 2401418,\n \"avg_ts\": 18.997669,\n \"stddev_ts\": 0.006770,\n \"samples_ns\": [ 6736227321, 6740441497, 6736339172 ],\n \"samples_ts\": [ 19.0017, 18.9899, 19.0014 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:29:04Z\",\n \"avg_ns\": 16920279799,\n \"stddev_ns\": 285204572,\n \"avg_ts\": 7.566308,\n \"stddev_ts\": 0.126442,\n \"samples_ns\": [ 16713210389, 16802041364, 17245587645 ],\n \"samples_ts\": [ 7.65861, 7.61812, 7.42219 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T23:28:37Z", "avg_ns": 6737669330, "stddev_ns": 2401418, "avg_ts": 18.997669, "stddev_ts": 0.00677, "samples_ns": [ 6736227321, 6740441497, 6736339172 ], "samples_ts": [ 19.0017, 18.9899, 19.0014 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T23:29:04Z", "avg_ns": 16920279799, "stddev_ns": 285204572, "avg_ts": 7.566308, "stddev_ts": 0.126442, "samples_ns": [ 16713210389, 16802041364, 17245587645 ], "samples_ts": [ 7.65861, 7.61812, 7.42219 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 236 }, { "timestamp_utc": "2025-12-08T23:33:50.375708+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:29:55Z\",\n \"avg_ns\": 6897622321,\n \"stddev_ns\": 313231827,\n \"avg_ts\": 18.582013,\n \"stddev_ts\": 0.822281,\n \"samples_ns\": [ 6715560682, 7259308519, 6717997764 ],\n \"samples_ts\": [ 19.0602, 17.6325, 19.0533 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:30:23Z\",\n \"avg_ns\": 68936908217,\n \"stddev_ns\": 3279456757,\n \"avg_ts\": 7.428677,\n \"stddev_ts\": 0.133390,\n \"samples_ns\": [ 68941040371, 70172360509, 67697323772 ],\n \"samples_ts\": [ 7.42664, 7.29632, 7.56308 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T23:29:55Z", "avg_ns": 6897622321, "stddev_ns": 313231827, "avg_ts": 18.582013, "stddev_ts": 0.822281, "samples_ns": [ 6715560682, 7259308519, 6717997764 ], "samples_ts": [ 19.0602, 17.6325, 19.0533 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T23:30:23Z", "avg_ns": 68936908217, "stddev_ns": 3279456757, "avg_ts": 7.428677, "stddev_ts": 0.13339, "samples_ns": [ 68941040371, 70172360509, 67697323772 ], "samples_ts": [ 7.42664, 7.29632, 7.56308 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 237 }, { "timestamp_utc": "2025-12-08T23:36:37.280751+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:33:51Z\",\n \"avg_ns\": 27973128124,\n \"stddev_ns\": 563236184,\n \"avg_ts\": 18.308202,\n \"stddev_ts\": 0.366676,\n \"samples_ns\": [ 28581945945, 27470626767, 27866811661 ],\n \"samples_ts\": [ 17.9134, 18.6381, 18.3731 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:35:43Z\",\n \"avg_ns\": 17794702865,\n \"stddev_ns\": 100207305,\n \"avg_ts\": 7.193304,\n \"stddev_ts\": 0.040408,\n \"samples_ns\": [ 17715153912, 17907247249, 17761707435 ],\n \"samples_ts\": [ 7.22545, 7.14794, 7.20651 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T23:33:51Z", "avg_ns": 27973128124, "stddev_ns": 563236184, "avg_ts": 18.308202, "stddev_ts": 0.366676, "samples_ns": [ 28581945945, 27470626767, 27866811661 ], "samples_ts": [ 17.9134, 18.6381, 18.3731 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T23:35:43Z", "avg_ns": 17794702865, "stddev_ns": 100207305, "avg_ts": 7.193304, "stddev_ts": 0.040408, "samples_ns": [ 17715153912, 17907247249, 17761707435 ], "samples_ts": [ 7.22545, 7.14794, 7.20651 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 238 }, { "timestamp_utc": "2025-12-08T23:41:58.269352+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:36:38Z\",\n \"avg_ns\": 27678753623,\n \"stddev_ns\": 321910873,\n \"avg_ts\": 18.499600,\n \"stddev_ts\": 0.213721,\n \"samples_ns\": [ 28050419045, 27497940212, 27487901613 ],\n \"samples_ts\": [ 18.2528, 18.6196, 18.6264 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:38:28Z\",\n \"avg_ns\": 69820051855,\n \"stddev_ns\": 673409950,\n \"avg_ts\": 7.333592,\n \"stddev_ts\": 0.070843,\n \"samples_ns\": [ 70454901654, 69113775839, 69891478073 ],\n \"samples_ts\": [ 7.26706, 7.40807, 7.32564 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T23:36:38Z", "avg_ns": 27678753623, "stddev_ns": 321910873, "avg_ts": 18.4996, "stddev_ts": 0.213721, "samples_ns": [ 28050419045, 27497940212, 27487901613 ], "samples_ts": [ 18.2528, 18.6196, 18.6264 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T23:38:28Z", "avg_ns": 69820051855, "stddev_ns": 673409950, "avg_ts": 7.333592, "stddev_ts": 0.070843, "samples_ns": [ 70454901654, 69113775839, 69891478073 ], "samples_ts": [ 7.26706, 7.40807, 7.32564 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 239 }, { "timestamp_utc": "2025-12-08T23:43:20.678854+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:41:59Z\",\n \"avg_ns\": 6941754814,\n \"stddev_ns\": 168373585,\n \"avg_ts\": 18.446436,\n \"stddev_ts\": 0.451139,\n \"samples_ns\": [ 7086940358, 6981146531, 6757177554 ],\n \"samples_ts\": [ 18.0614, 18.3351, 18.9428 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:42:27Z\",\n \"avg_ns\": 17765307713,\n \"stddev_ns\": 24307476,\n \"avg_ts\": 7.205063,\n \"stddev_ts\": 0.009856,\n \"samples_ns\": [ 17742187749, 17763086610, 17790648782 ],\n \"samples_ts\": [ 7.21444, 7.20595, 7.19479 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T23:41:59Z", "avg_ns": 6941754814, "stddev_ns": 168373585, "avg_ts": 18.446436, "stddev_ts": 0.451139, "samples_ns": [ 7086940358, 6981146531, 6757177554 ], "samples_ts": [ 18.0614, 18.3351, 18.9428 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T23:42:27Z", "avg_ns": 17765307713, "stddev_ns": 24307476, "avg_ts": 7.205063, "stddev_ts": 0.009856, "samples_ns": [ 17742187749, 17763086610, 17790648782 ], "samples_ts": [ 7.21444, 7.20595, 7.19479 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 240 }, { "timestamp_utc": "2025-12-08T23:47:26.319758+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:43:21Z\",\n \"avg_ns\": 6727120625,\n \"stddev_ns\": 6257989,\n \"avg_ts\": 19.027468,\n \"stddev_ts\": 0.017691,\n \"samples_ns\": [ 6734325024, 6723033749, 6724003102 ],\n \"samples_ts\": [ 19.0071, 19.039, 19.0363 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:43:48Z\",\n \"avg_ns\": 72583566777,\n \"stddev_ns\": 23872651,\n \"avg_ts\": 7.053939,\n \"stddev_ts\": 0.002320,\n \"samples_ns\": [ 72557315263, 72603965897, 72589419173 ],\n \"samples_ts\": [ 7.05649, 7.05196, 7.05337 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T23:43:21Z", "avg_ns": 6727120625, "stddev_ns": 6257989, "avg_ts": 19.027468, "stddev_ts": 0.017691, "samples_ns": [ 6734325024, 6723033749, 6724003102 ], "samples_ts": [ 19.0071, 19.039, 19.0363 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T23:43:48Z", "avg_ns": 72583566777, "stddev_ns": 23872651, "avg_ts": 7.053939, "stddev_ts": 0.00232, "samples_ns": [ 72557315263, 72603965897, 72589419173 ], "samples_ts": [ 7.05649, 7.05196, 7.05337 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 241 }, { "timestamp_utc": "2025-12-08T23:50:07.220138+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:47:27Z\",\n \"avg_ns\": 27189126685,\n \"stddev_ns\": 304625552,\n \"avg_ts\": 18.832623,\n \"stddev_ts\": 0.209668,\n \"samples_ns\": [ 26994542448, 27540189234, 27032648374 ],\n \"samples_ts\": [ 18.9668, 18.591, 18.9401 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:49:15Z\",\n \"avg_ns\": 17123985217,\n \"stddev_ns\": 414906192,\n \"avg_ts\": 7.477782,\n \"stddev_ts\": 0.178716,\n \"samples_ns\": [ 16906724447, 17602406669, 16862824537 ],\n \"samples_ts\": [ 7.57095, 7.27173, 7.59066 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T23:47:27Z", "avg_ns": 27189126685, "stddev_ns": 304625552, "avg_ts": 18.832623, "stddev_ts": 0.209668, "samples_ns": [ 26994542448, 27540189234, 27032648374 ], "samples_ts": [ 18.9668, 18.591, 18.9401 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T23:49:15Z", "avg_ns": 17123985217, "stddev_ns": 414906192, "avg_ts": 7.477782, "stddev_ts": 0.178716, "samples_ns": [ 16906724447, 17602406669, 16862824537 ], "samples_ts": [ 7.57095, 7.27173, 7.59066 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 242 }, { "timestamp_utc": "2025-12-08T23:55:24.340092+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:50:08Z\",\n \"avg_ns\": 27344645593,\n \"stddev_ns\": 326730968,\n \"avg_ts\": 18.725753,\n \"stddev_ts\": 0.225301,\n \"samples_ns\": [ 27533205534, 26967369179, 27533362066 ],\n \"samples_ts\": [ 18.5957, 18.9859, 18.5956 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:51:57Z\",\n \"avg_ns\": 69026611951,\n \"stddev_ns\": 607029909,\n \"avg_ts\": 7.417810,\n \"stddev_ts\": 0.064904,\n \"samples_ns\": [ 68687598698, 69727426230, 68664810925 ],\n \"samples_ts\": [ 7.45404, 7.34288, 7.45651 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T23:50:08Z", "avg_ns": 27344645593, "stddev_ns": 326730968, "avg_ts": 18.725753, "stddev_ts": 0.225301, "samples_ns": [ 27533205534, 26967369179, 27533362066 ], "samples_ts": [ 18.5957, 18.9859, 18.5956 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T23:51:57Z", "avg_ns": 69026611951, "stddev_ns": 607029909, "avg_ts": 7.41781, "stddev_ts": 0.064904, "samples_ns": [ 68687598698, 69727426230, 68664810925 ], "samples_ts": [ 7.45404, 7.34288, 7.45651 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 243 }, { "timestamp_utc": "2025-12-08T23:56:46.162770+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:55:25Z\",\n \"avg_ns\": 6896039558,\n \"stddev_ns\": 316363417,\n \"avg_ts\": 18.586784,\n \"stddev_ts\": 0.830692,\n \"samples_ns\": [ 7261335831, 6711203572, 6715579271 ],\n \"samples_ts\": [ 17.6276, 19.0726, 19.0602 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:55:52Z\",\n \"avg_ns\": 17800858044,\n \"stddev_ns\": 55331492,\n \"avg_ts\": 7.190711,\n \"stddev_ts\": 0.022383,\n \"samples_ns\": [ 17844109763, 17738507437, 17819956934 ],\n \"samples_ts\": [ 7.17324, 7.21594, 7.18296 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T23:55:25Z", "avg_ns": 6896039558, "stddev_ns": 316363417, "avg_ts": 18.586784, "stddev_ts": 0.830692, "samples_ns": [ 7261335831, 6711203572, 6715579271 ], "samples_ts": [ 17.6276, 19.0726, 19.0602 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-08T23:55:52Z", "avg_ns": 17800858044, "stddev_ns": 55331492, "avg_ts": 7.190711, "stddev_ts": 0.022383, "samples_ns": [ 17844109763, 17738507437, 17819956934 ], "samples_ts": [ 7.17324, 7.21594, 7.18296 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 244 }, { "timestamp_utc": "2025-12-09T00:00:53.904541+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:56:46Z\",\n \"avg_ns\": 6710021926,\n \"stddev_ns\": 8765076,\n \"avg_ts\": 19.075965,\n \"stddev_ts\": 0.024899,\n \"samples_ns\": [ 6704434522, 6720123538, 6705507719 ],\n \"samples_ts\": [ 19.0918, 19.0473, 19.0888 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:57:13Z\",\n \"avg_ns\": 73310938622,\n \"stddev_ns\": 60223795,\n \"avg_ts\": 6.983954,\n \"stddev_ts\": 0.005734,\n \"samples_ns\": [ 73380393063, 73273241653, 73279181152 ],\n \"samples_ts\": [ 6.97734, 6.98754, 6.98698 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-08T23:56:46Z", "avg_ns": 6710021926, "stddev_ns": 8765076, "avg_ts": 19.075965, "stddev_ts": 0.024899, "samples_ns": [ 6704434522, 6720123538, 6705507719 ], "samples_ts": [ 19.0918, 19.0473, 19.0888 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-08T23:57:13Z", "avg_ns": 73310938622, "stddev_ns": 60223795, "avg_ts": 6.983954, "stddev_ts": 0.005734, "samples_ns": [ 73380393063, 73273241653, 73279181152 ], "samples_ts": [ 6.97734, 6.98754, 6.98698 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 245 }, { "timestamp_utc": "2025-12-09T00:03:41.040716+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:00:54Z\",\n \"avg_ns\": 27725003129,\n \"stddev_ns\": 538284616,\n \"avg_ts\": 18.471721,\n \"stddev_ts\": 0.358147,\n \"samples_ns\": [ 28276962935, 27201524635, 27696521819 ],\n \"samples_ts\": [ 18.1066, 18.8225, 18.4861 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:02:45Z\",\n \"avg_ns\": 18384353019,\n \"stddev_ns\": 3088685363,\n \"avg_ts\": 6.966772,\n \"stddev_ts\": 0.212209,\n \"samples_ns\": [ 18976692428, 17856978381, 18319388250 ],\n \"samples_ts\": [ 6.74512, 7.16807, 6.98713 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T00:00:54Z", "avg_ns": 27725003129, "stddev_ns": 538284616, "avg_ts": 18.471721, "stddev_ts": 0.358147, "samples_ns": [ 28276962935, 27201524635, 27696521819 ], "samples_ts": [ 18.1066, 18.8225, 18.4861 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T00:02:45Z", "avg_ns": 18384353019, "stddev_ns": 3088685363, "avg_ts": 6.966772, "stddev_ts": 0.212209, "samples_ns": [ 18976692428, 17856978381, 18319388250 ], "samples_ts": [ 6.74512, 7.16807, 6.98713 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 246 }, { "timestamp_utc": "2025-12-09T00:09:02.935028+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:03:41Z\",\n \"avg_ns\": 28539680160,\n \"stddev_ns\": 316847669,\n \"avg_ts\": 17.941417,\n \"stddev_ts\": 0.200210,\n \"samples_ns\": [ 28786672324, 28649933026, 28182435131 ],\n \"samples_ts\": [ 17.786, 17.8709, 18.1673 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:05:36Z\",\n \"avg_ns\": 68852917916,\n \"stddev_ns\": 295252626,\n \"avg_ts\": 7.436232,\n \"stddev_ts\": 0.031839,\n \"samples_ns\": [ 69178686686, 68602969503, 68777097559 ],\n \"samples_ts\": [ 7.40112, 7.46323, 7.44434 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T00:03:41Z", "avg_ns": 28539680160, "stddev_ns": 316847669, "avg_ts": 17.941417, "stddev_ts": 0.20021, "samples_ns": [ 28786672324, 28649933026, 28182435131 ], "samples_ts": [ 17.786, 17.8709, 18.1673 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T00:05:36Z", "avg_ns": 68852917916, "stddev_ns": 295252626, "avg_ts": 7.436232, "stddev_ts": 0.031839, "samples_ns": [ 69178686686, 68602969503, 68777097559 ], "samples_ts": [ 7.40112, 7.46323, 7.44434 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 247 }, { "timestamp_utc": "2025-12-09T00:10:24.731314+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:09:03Z\",\n \"avg_ns\": 6746719899,\n \"stddev_ns\": 29149378,\n \"avg_ts\": 18.972418,\n \"stddev_ts\": 0.081975,\n \"samples_ns\": [ 6746922968, 6717469633, 6775767097 ],\n \"samples_ts\": [ 18.9716, 19.0548, 18.8909 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:09:30Z\",\n \"avg_ns\": 17773478952,\n \"stddev_ns\": 11093616,\n \"avg_ts\": 7.201743,\n \"stddev_ts\": 0.004493,\n \"samples_ns\": [ 17768956489, 17786117721, 17765362648 ],\n \"samples_ts\": [ 7.20357, 7.19662, 7.20503 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T00:09:03Z", "avg_ns": 6746719899, "stddev_ns": 29149378, "avg_ts": 18.972418, "stddev_ts": 0.081975, "samples_ns": [ 6746922968, 6717469633, 6775767097 ], "samples_ts": [ 18.9716, 19.0548, 18.8909 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T00:09:30Z", "avg_ns": 17773478952, "stddev_ns": 11093616, "avg_ts": 7.201743, "stddev_ts": 0.004493, "samples_ns": [ 17768956489, 17786117721, 17765362648 ], "samples_ts": [ 7.20357, 7.19662, 7.20503 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 248 }, { "timestamp_utc": "2025-12-09T00:14:31.511618+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:10:25Z\",\n \"avg_ns\": 6891901738,\n \"stddev_ns\": 305342452,\n \"avg_ts\": 18.596255,\n \"stddev_ts\": 0.803491,\n \"samples_ns\": [ 7244223484, 6727405068, 6704076662 ],\n \"samples_ts\": [ 17.6693, 19.0267, 19.0929 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:10:52Z\",\n \"avg_ns\": 72796480370,\n \"stddev_ns\": 771047190,\n \"avg_ts\": 7.033836,\n \"stddev_ts\": 0.074849,\n \"samples_ns\": [ 73047322481, 73410871755, 71931246874 ],\n \"samples_ts\": [ 7.00915, 6.97444, 7.11791 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T00:10:25Z", "avg_ns": 6891901738, "stddev_ns": 305342452, "avg_ts": 18.596255, "stddev_ts": 0.803491, "samples_ns": [ 7244223484, 6727405068, 6704076662 ], "samples_ts": [ 17.6693, 19.0267, 19.0929 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T00:10:52Z", "avg_ns": 72796480370, "stddev_ns": 771047190, "avg_ts": 7.033836, "stddev_ts": 0.074849, "samples_ns": [ 73047322481, 73410871755, 71931246874 ], "samples_ts": [ 7.00915, 6.97444, 7.11791 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 249 }, { "timestamp_utc": "2025-12-09T00:17:13.641314+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:14:32Z\",\n \"avg_ns\": 27679281469,\n \"stddev_ns\": 322730172,\n \"avg_ts\": 18.499255,\n \"stddev_ts\": 0.214255,\n \"samples_ns\": [ 28051853331, 27486113339, 27499877739 ],\n \"samples_ts\": [ 18.2519, 18.6276, 18.6183 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:16:22Z\",\n \"avg_ns\": 16850454104,\n \"stddev_ns\": 119698394,\n \"avg_ts\": 7.596489,\n \"stddev_ts\": 0.053775,\n \"samples_ns\": [ 16804359683, 16986347156, 16760655473 ],\n \"samples_ts\": [ 7.61707, 7.53546, 7.63693 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T00:14:32Z", "avg_ns": 27679281469, "stddev_ns": 322730172, "avg_ts": 18.499255, "stddev_ts": 0.214255, "samples_ns": [ 28051853331, 27486113339, 27499877739 ], "samples_ts": [ 18.2519, 18.6276, 18.6183 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T00:16:22Z", "avg_ns": 16850454104, "stddev_ns": 119698394, "avg_ts": 7.596489, "stddev_ts": 0.053775, "samples_ns": [ 16804359683, 16986347156, 16760655473 ], "samples_ts": [ 7.61707, 7.53546, 7.63693 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 250 }, { "timestamp_utc": "2025-12-09T00:22:36.814940+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:17:14Z\",\n \"avg_ns\": 27871515650,\n \"stddev_ns\": 645488493,\n \"avg_ts\": 18.376494,\n \"stddev_ts\": 0.420019,\n \"samples_ns\": [ 27526090745, 27472243294, 28616212911 ],\n \"samples_ts\": [ 18.6005, 18.637, 17.892 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:19:06Z\",\n \"avg_ns\": 70169104515,\n \"stddev_ns\": 1293926743,\n \"avg_ts\": 7.298324,\n \"stddev_ts\": 0.135496,\n \"samples_ns\": [ 71264212776, 70501777459, 68741323312 ],\n \"samples_ts\": [ 7.18453, 7.26223, 7.44821 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T00:17:14Z", "avg_ns": 27871515650, "stddev_ns": 645488493, "avg_ts": 18.376494, "stddev_ts": 0.420019, "samples_ns": [ 27526090745, 27472243294, 28616212911 ], "samples_ts": [ 18.6005, 18.637, 17.892 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T00:19:06Z", "avg_ns": 70169104515, "stddev_ns": 1293926743, "avg_ts": 7.298324, "stddev_ts": 0.135496, "samples_ns": [ 71264212776, 70501777459, 68741323312 ], "samples_ts": [ 7.18453, 7.26223, 7.44821 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 251 }, { "timestamp_utc": "2025-12-09T00:23:19.488217+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:22:37Z\",\n \"avg_ns\": 3433192864,\n \"stddev_ns\": 10666739,\n \"avg_ts\": 37.283318,\n \"stddev_ts\": 0.115657,\n \"samples_ns\": [ 3425291475, 3428960932, 3445326185 ],\n \"samples_ts\": [ 37.3691, 37.3291, 37.1518 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:22:51Z\",\n \"avg_ns\": 9341627288,\n \"stddev_ns\": 371337226,\n \"avg_ts\": 13.716248,\n \"stddev_ts\": 0.533530,\n \"samples_ns\": [ 9163941712, 9768423023, 9092517131 ],\n \"samples_ts\": [ 13.9678, 13.1034, 14.0775 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T00:22:37Z", "avg_ns": 3433192864, "stddev_ns": 10666739, "avg_ts": 37.283318, "stddev_ts": 0.115657, "samples_ns": [ 3425291475, 3428960932, 3445326185 ], "samples_ts": [ 37.3691, 37.3291, 37.1518 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T00:22:51Z", "avg_ns": 9341627288, "stddev_ns": 371337226, "avg_ts": 13.716248, "stddev_ts": 0.53353, "samples_ns": [ 9163941712, 9768423023, 9092517131 ], "samples_ts": [ 13.9678, 13.1034, 14.0775 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 512, "ubatch_size": 128, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 252 }, { "timestamp_utc": "2025-12-09T00:25:26.893704+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:23:20Z\",\n \"avg_ns\": 3620638555,\n \"stddev_ns\": 335161992,\n \"avg_ts\": 35.545664,\n \"stddev_ts\": 3.123892,\n \"samples_ns\": [ 4007564330, 3420112538, 3434238799 ],\n \"samples_ts\": [ 31.9396, 37.4257, 37.2717 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:23:34Z\",\n \"avg_ns\": 37384249779,\n \"stddev_ns\": 417895062,\n \"avg_ts\": 13.696756,\n \"stddev_ts\": 0.154100,\n \"samples_ns\": [ 37634028604, 36901807417, 37616913318 ],\n \"samples_ts\": [ 13.6047, 13.8747, 13.6109 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T00:23:20Z", "avg_ns": 3620638555, "stddev_ns": 335161992, "avg_ts": 35.545664, "stddev_ts": 3.123892, "samples_ns": [ 4007564330, 3420112538, 3434238799 ], "samples_ts": [ 31.9396, 37.4257, 37.2717 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T00:23:34Z", "avg_ns": 37384249779, "stddev_ns": 417895062, "avg_ts": 13.696756, "stddev_ts": 0.1541, "samples_ns": [ 37634028604, 36901807417, 37616913318 ], "samples_ts": [ 13.6047, 13.8747, 13.6109 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 512, "ubatch_size": 128, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 253 }, { "timestamp_utc": "2025-12-09T00:26:50.476906+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:25:27Z\",\n \"avg_ns\": 13811003944,\n \"stddev_ns\": 17570736,\n \"avg_ts\": 37.071929,\n \"stddev_ts\": 0.047136,\n \"samples_ns\": [ 13830775229, 13805060279, 13797176325 ],\n \"samples_ts\": [ 37.0189, 37.0878, 37.109 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:26:22Z\",\n \"avg_ns\": 9117491617,\n \"stddev_ns\": 29322747,\n \"avg_ts\": 14.039046,\n \"stddev_ts\": 0.045231,\n \"samples_ns\": [ 9083793054, 9137190382, 9131491416 ],\n \"samples_ts\": [ 14.091, 14.0087, 14.0174 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T00:25:27Z", "avg_ns": 13811003944, "stddev_ns": 17570736, "avg_ts": 37.071929, "stddev_ts": 0.047136, "samples_ns": [ 13830775229, 13805060279, 13797176325 ], "samples_ts": [ 37.0189, 37.0878, 37.109 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T00:26:22Z", "avg_ns": 9117491617, "stddev_ns": 29322747, "avg_ts": 14.039046, "stddev_ts": 0.045231, "samples_ns": [ 9083793054, 9137190382, 9131491416 ], "samples_ts": [ 14.091, 14.0087, 14.0174 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 512, "ubatch_size": 128, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 254 }, { "timestamp_utc": "2025-12-09T00:29:39.496136+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:26:51Z\",\n \"avg_ns\": 13778237633,\n \"stddev_ns\": 17846811,\n \"avg_ts\": 37.160092,\n \"stddev_ts\": 0.048154,\n \"samples_ns\": [ 13782683652, 13758588506, 13793440742 ],\n \"samples_ts\": [ 37.1481, 37.2131, 37.1191 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:27:46Z\",\n \"avg_ns\": 37660894476,\n \"stddev_ns\": 43159004,\n \"avg_ts\": 13.595016,\n \"stddev_ts\": 0.015585,\n \"samples_ns\": [ 37670847285, 37698207175, 37613628969 ],\n \"samples_ts\": [ 13.5914, 13.5815, 13.6121 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T00:26:51Z", "avg_ns": 13778237633, "stddev_ns": 17846811, "avg_ts": 37.160092, "stddev_ts": 0.048154, "samples_ns": [ 13782683652, 13758588506, 13793440742 ], "samples_ts": [ 37.1481, 37.2131, 37.1191 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T00:27:46Z", "avg_ns": 37660894476, "stddev_ns": 43159004, "avg_ts": 13.595016, "stddev_ts": 0.015585, "samples_ns": [ 37670847285, 37698207175, 37613628969 ], "samples_ts": [ 13.5914, 13.5815, 13.6121 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 512, "ubatch_size": 128, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 255 }, { "timestamp_utc": "2025-12-09T00:30:21.651320+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:29:40Z\",\n \"avg_ns\": 3429832438,\n \"stddev_ns\": 8304445,\n \"avg_ts\": 37.319753,\n \"stddev_ts\": 0.090471,\n \"samples_ns\": [ 3420336853, 3435734503, 3433425960 ],\n \"samples_ts\": [ 37.4232, 37.2555, 37.2805 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:29:53Z\",\n \"avg_ns\": 9169744921,\n \"stddev_ns\": 38070101,\n \"avg_ts\": 13.959109,\n \"stddev_ts\": 0.058025,\n \"samples_ns\": [ 9203310364, 9128378599, 9177545800 ],\n \"samples_ts\": [ 13.908, 14.0222, 13.9471 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T00:29:40Z", "avg_ns": 3429832438, "stddev_ns": 8304445, "avg_ts": 37.319753, "stddev_ts": 0.090471, "samples_ns": [ 3420336853, 3435734503, 3433425960 ], "samples_ts": [ 37.4232, 37.2555, 37.2805 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T00:29:53Z", "avg_ns": 9169744921, "stddev_ns": 38070101, "avg_ts": 13.959109, "stddev_ts": 0.058025, "samples_ns": [ 9203310364, 9128378599, 9177545800 ], "samples_ts": [ 13.908, 14.0222, 13.9471 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 512, "ubatch_size": 256, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 256 }, { "timestamp_utc": "2025-12-09T00:32:28.548290+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:30:22Z\",\n \"avg_ns\": 3429073822,\n \"stddev_ns\": 8802524,\n \"avg_ts\": 37.328027,\n \"stddev_ts\": 0.095750,\n \"samples_ns\": [ 3427392670, 3421233508, 3438595290 ],\n \"samples_ts\": [ 37.3462, 37.4134, 37.2245 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:30:36Z\",\n \"avg_ns\": 37239670945,\n \"stddev_ns\": 371964037,\n \"avg_ts\": 13.749699,\n \"stddev_ts\": 0.138126,\n \"samples_ns\": [ 37472081479, 37436269476, 36810661881 ],\n \"samples_ts\": [ 13.6635, 13.6766, 13.909 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T00:30:22Z", "avg_ns": 3429073822, "stddev_ns": 8802524, "avg_ts": 37.328027, "stddev_ts": 0.09575, "samples_ns": [ 3427392670, 3421233508, 3438595290 ], "samples_ts": [ 37.3462, 37.4134, 37.2245 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T00:30:36Z", "avg_ns": 37239670945, "stddev_ns": 371964037, "avg_ts": 13.749699, "stddev_ts": 0.138126, "samples_ns": [ 37472081479, 37436269476, 36810661881 ], "samples_ts": [ 13.6635, 13.6766, 13.909 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 512, "ubatch_size": 256, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 257 }, { "timestamp_utc": "2025-12-09T00:33:54.714102+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:32:29Z\",\n \"avg_ns\": 13836377036,\n \"stddev_ns\": 6300398,\n \"avg_ts\": 37.003911,\n \"stddev_ts\": 0.016851,\n \"samples_ns\": [ 13837187980, 13829710430, 13842232698 ],\n \"samples_ts\": [ 37.0017, 37.0217, 36.9883 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:33:25Z\",\n \"avg_ns\": 9779473564,\n \"stddev_ns\": 359072835,\n \"avg_ts\": 13.100169,\n \"stddev_ts\": 0.471042,\n \"samples_ns\": [ 9581042750, 10193970117, 9563407826 ],\n \"samples_ts\": [ 13.3597, 12.5564, 13.3844 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T00:32:29Z", "avg_ns": 13836377036, "stddev_ns": 6300398, "avg_ts": 37.003911, "stddev_ts": 0.016851, "samples_ns": [ 13837187980, 13829710430, 13842232698 ], "samples_ts": [ 37.0017, 37.0217, 36.9883 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T00:33:25Z", "avg_ns": 9779473564, "stddev_ns": 359072835, "avg_ts": 13.100169, "stddev_ts": 0.471042, "samples_ns": [ 9581042750, 10193970117, 9563407826 ], "samples_ts": [ 13.3597, 12.5564, 13.3844 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 512, "ubatch_size": 256, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 258 }, { "timestamp_utc": "2025-12-09T00:36:43.842221+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:33:55Z\",\n \"avg_ns\": 14320724162,\n \"stddev_ns\": 302581303,\n \"avg_ts\": 35.762898,\n \"stddev_ts\": 0.746575,\n \"samples_ns\": [ 14156671342, 14669903003, 14135598143 ],\n \"samples_ts\": [ 36.1667, 34.9014, 36.2206 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:34:53Z\",\n \"avg_ns\": 36831170653,\n \"stddev_ns\": 62882365,\n \"avg_ts\": 13.901296,\n \"stddev_ts\": 0.023728,\n \"samples_ns\": [ 36896626889, 36825658877, 36771226195 ],\n \"samples_ts\": [ 13.8766, 13.9033, 13.9239 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T00:33:55Z", "avg_ns": 14320724162, "stddev_ns": 302581303, "avg_ts": 35.762898, "stddev_ts": 0.746575, "samples_ns": [ 14156671342, 14669903003, 14135598143 ], "samples_ts": [ 36.1667, 34.9014, 36.2206 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T00:34:53Z", "avg_ns": 36831170653, "stddev_ns": 62882365, "avg_ts": 13.901296, "stddev_ts": 0.023728, "samples_ns": [ 36896626889, 36825658877, 36771226195 ], "samples_ts": [ 13.8766, 13.9033, 13.9239 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 512, "ubatch_size": 256, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 259 }, { "timestamp_utc": "2025-12-09T00:37:27.166009+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:36:44Z\",\n \"avg_ns\": 3628845450,\n \"stddev_ns\": 316460952,\n \"avg_ts\": 35.444028,\n \"stddev_ts\": 2.943026,\n \"samples_ns\": [ 3451665631, 3994207856, 3440662864 ],\n \"samples_ts\": [ 37.0835, 32.0464, 37.2021 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:36:58Z\",\n \"avg_ns\": 9354416654,\n \"stddev_ns\": 330446033,\n \"avg_ts\": 13.694541,\n \"stddev_ts\": 0.474108,\n \"samples_ns\": [ 9169720599, 9735918772, 9157610593 ],\n \"samples_ts\": [ 13.959, 13.1472, 13.9774 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T00:36:44Z", "avg_ns": 3628845450, "stddev_ns": 316460952, "avg_ts": 35.444028, "stddev_ts": 2.943026, "samples_ns": [ 3451665631, 3994207856, 3440662864 ], "samples_ts": [ 37.0835, 32.0464, 37.2021 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T00:36:58Z", "avg_ns": 9354416654, "stddev_ns": 330446033, "avg_ts": 13.694541, "stddev_ts": 0.474108, "samples_ns": [ 9169720599, 9735918772, 9157610593 ], "samples_ts": [ 13.959, 13.1472, 13.9774 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 512, "ubatch_size": 512, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 260 }, { "timestamp_utc": "2025-12-09T00:39:35.349252+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:37:27Z\",\n \"avg_ns\": 3610594558,\n \"stddev_ns\": 324238537,\n \"avg_ts\": 35.633396,\n \"stddev_ts\": 3.042915,\n \"samples_ns\": [ 3413845405, 3984827704, 3433110567 ],\n \"samples_ts\": [ 37.4944, 32.1218, 37.284 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:37:42Z\",\n \"avg_ns\": 37666696924,\n \"stddev_ns\": 380826102,\n \"avg_ts\": 13.593831,\n \"stddev_ts\": 0.136699,\n \"samples_ns\": [ 38102938008, 37496522266, 37400630498 ],\n \"samples_ts\": [ 13.4373, 13.6546, 13.6896 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T00:37:27Z", "avg_ns": 3610594558, "stddev_ns": 324238537, "avg_ts": 35.633396, "stddev_ts": 3.042915, "samples_ns": [ 3413845405, 3984827704, 3433110567 ], "samples_ts": [ 37.4944, 32.1218, 37.284 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T00:37:42Z", "avg_ns": 37666696924, "stddev_ns": 380826102, "avg_ts": 13.593831, "stddev_ts": 0.136699, "samples_ns": [ 38102938008, 37496522266, 37400630498 ], "samples_ts": [ 13.4373, 13.6546, 13.6896 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 512, "ubatch_size": 512, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 261 }, { "timestamp_utc": "2025-12-09T00:41:01.426950+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:39:36Z\",\n \"avg_ns\": 13994182500,\n \"stddev_ns\": 11119395,\n \"avg_ts\": 36.586647,\n \"stddev_ts\": 0.029070,\n \"samples_ns\": [ 14005113878, 13994548252, 13982885371 ],\n \"samples_ts\": [ 36.5581, 36.5857, 36.6162 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:40:32Z\",\n \"avg_ns\": 9530442067,\n \"stddev_ns\": 56417366,\n \"avg_ts\": 13.430959,\n \"stddev_ts\": 0.079237,\n \"samples_ns\": [ 9595569969, 9499176770, 9496579462 ],\n \"samples_ts\": [ 13.3395, 13.4749, 13.4785 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T00:39:36Z", "avg_ns": 13994182500, "stddev_ns": 11119395, "avg_ts": 36.586647, "stddev_ts": 0.02907, "samples_ns": [ 14005113878, 13994548252, 13982885371 ], "samples_ts": [ 36.5581, 36.5857, 36.6162 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T00:40:32Z", "avg_ns": 9530442067, "stddev_ns": 56417366, "avg_ts": 13.430959, "stddev_ts": 0.079237, "samples_ns": [ 9595569969, 9499176770, 9496579462 ], "samples_ts": [ 13.3395, 13.4749, 13.4785 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 512, "ubatch_size": 512, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 262 }, { "timestamp_utc": "2025-12-09T00:43:51.987760+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:41:02Z\",\n \"avg_ns\": 13957167273,\n \"stddev_ns\": 21616399,\n \"avg_ts\": 36.683720,\n \"stddev_ts\": 0.056831,\n \"samples_ns\": [ 13934193430, 13977103615, 13960204776 ],\n \"samples_ts\": [ 36.7441, 36.6313, 36.6757 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:41:58Z\",\n \"avg_ns\": 37919115560,\n \"stddev_ns\": 447976689,\n \"avg_ts\": 13.503688,\n \"stddev_ts\": 0.160461,\n \"samples_ns\": [ 38256793203, 37410919300, 38089634177 ],\n \"samples_ts\": [ 13.3832, 13.6858, 13.442 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T00:41:02Z", "avg_ns": 13957167273, "stddev_ns": 21616399, "avg_ts": 36.68372, "stddev_ts": 0.056831, "samples_ns": [ 13934193430, 13977103615, 13960204776 ], "samples_ts": [ 36.7441, 36.6313, 36.6757 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T00:41:58Z", "avg_ns": 37919115560, "stddev_ns": 447976689, "avg_ts": 13.503688, "stddev_ts": 0.160461, "samples_ns": [ 38256793203, 37410919300, 38089634177 ], "samples_ts": [ 13.3832, 13.6858, 13.442 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 512, "ubatch_size": 512, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 263 }, { "timestamp_utc": "2025-12-09T00:44:33.948747+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:43:52Z\",\n \"avg_ns\": 3426429465,\n \"stddev_ns\": 7777819,\n \"avg_ts\": 37.356800,\n \"stddev_ts\": 0.084833,\n \"samples_ns\": [ 3418191941, 3433646362, 3427450093 ],\n \"samples_ts\": [ 37.4467, 37.2782, 37.3455 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:44:06Z\",\n \"avg_ns\": 9113973866,\n \"stddev_ns\": 34155207,\n \"avg_ts\": 14.044499,\n \"stddev_ts\": 0.052581,\n \"samples_ns\": [ 9107847226, 9150777627, 9083296746 ],\n \"samples_ts\": [ 14.0538, 13.9879, 14.0918 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T00:43:52Z", "avg_ns": 3426429465, "stddev_ns": 7777819, "avg_ts": 37.3568, "stddev_ts": 0.084833, "samples_ns": [ 3418191941, 3433646362, 3427450093 ], "samples_ts": [ 37.4467, 37.2782, 37.3455 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T00:44:06Z", "avg_ns": 9113973866, "stddev_ns": 34155207, "avg_ts": 14.044499, "stddev_ts": 0.052581, "samples_ns": [ 9107847226, 9150777627, 9083296746 ], "samples_ts": [ 14.0538, 13.9879, 14.0918 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 264 }, { "timestamp_utc": "2025-12-09T00:46:40.409480+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:44:34Z\",\n \"avg_ns\": 3428627713,\n \"stddev_ns\": 632683,\n \"avg_ts\": 37.332721,\n \"stddev_ts\": 0.006889,\n \"samples_ns\": [ 3429239601, 3428667433, 3427976105 ],\n \"samples_ts\": [ 37.3261, 37.3323, 37.3398 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:44:48Z\",\n \"avg_ns\": 37269632663,\n \"stddev_ns\": 773168103,\n \"avg_ts\": 13.741623,\n \"stddev_ts\": 0.281802,\n \"samples_ns\": [ 36887616819, 38159451549, 36761829623 ],\n \"samples_ts\": [ 13.88, 13.4174, 13.9275 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T00:44:34Z", "avg_ns": 3428627713, "stddev_ns": 632683, "avg_ts": 37.332721, "stddev_ts": 0.006889, "samples_ns": [ 3429239601, 3428667433, 3427976105 ], "samples_ts": [ 37.3261, 37.3323, 37.3398 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T00:44:48Z", "avg_ns": 37269632663, "stddev_ns": 773168103, "avg_ts": 13.741623, "stddev_ts": 0.281802, "samples_ns": [ 36887616819, 38159451549, 36761829623 ], "samples_ts": [ 13.88, 13.4174, 13.9275 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 265 }, { "timestamp_utc": "2025-12-09T00:48:05.660087+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:46:41Z\",\n \"avg_ns\": 14148334569,\n \"stddev_ns\": 299226919,\n \"avg_ts\": 36.198931,\n \"stddev_ts\": 0.774929,\n \"samples_ns\": [ 13803299993, 14336666122, 14305037593 ],\n \"samples_ts\": [ 37.0926, 35.7126, 35.7916 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:47:37Z\",\n \"avg_ns\": 9356037042,\n \"stddev_ns\": 345922165,\n \"avg_ts\": 13.693240,\n \"stddev_ts\": 0.496334,\n \"samples_ns\": [ 9752904912, 9118436204, 9196770012 ],\n \"samples_ts\": [ 13.1243, 14.0375, 13.9179 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T00:46:41Z", "avg_ns": 14148334569, "stddev_ns": 299226919, "avg_ts": 36.198931, "stddev_ts": 0.774929, "samples_ns": [ 13803299993, 14336666122, 14305037593 ], "samples_ts": [ 37.0926, 35.7126, 35.7916 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T00:47:37Z", "avg_ns": 9356037042, "stddev_ns": 345922165, "avg_ts": 13.69324, "stddev_ts": 0.496334, "samples_ns": [ 9752904912, 9118436204, 9196770012 ], "samples_ts": [ 13.1243, 14.0375, 13.9179 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 266 }, { "timestamp_utc": "2025-12-09T00:50:55.206314+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:48:06Z\",\n \"avg_ns\": 14172796470,\n \"stddev_ns\": 335137892,\n \"avg_ts\": 36.138852,\n \"stddev_ts\": 0.844150,\n \"samples_ns\": [ 14555724354, 14029727463, 13932937594 ],\n \"samples_ts\": [ 35.1752, 36.4939, 36.7475 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:49:03Z\",\n \"avg_ns\": 37202660192,\n \"stddev_ns\": 369124203,\n \"avg_ts\": 13.763365,\n \"stddev_ts\": 0.137334,\n \"samples_ns\": [ 37393480652, 37437315625, 36777184299 ],\n \"samples_ts\": [ 13.6922, 13.6762, 13.9217 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T00:48:06Z", "avg_ns": 14172796470, "stddev_ns": 335137892, "avg_ts": 36.138852, "stddev_ts": 0.84415, "samples_ns": [ 14555724354, 14029727463, 13932937594 ], "samples_ts": [ 35.1752, 36.4939, 36.7475 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T00:49:03Z", "avg_ns": 37202660192, "stddev_ns": 369124203, "avg_ts": 13.763365, "stddev_ts": 0.137334, "samples_ns": [ 37393480652, 37437315625, 36777184299 ], "samples_ts": [ 13.6922, 13.6762, 13.9217 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 267 }, { "timestamp_utc": "2025-12-09T00:51:38.377033+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:50:55Z\",\n \"avg_ns\": 3604853641,\n \"stddev_ns\": 311477405,\n \"avg_ts\": 35.676817,\n \"stddev_ts\": 2.936184,\n \"samples_ns\": [ 3964516089, 3425627943, 3424416891 ],\n \"samples_ts\": [ 32.2864, 37.3654, 37.3786 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:51:10Z\",\n \"avg_ns\": 9337239487,\n \"stddev_ns\": 384042745,\n \"avg_ts\": 13.723667,\n \"stddev_ts\": 0.551383,\n \"samples_ns\": [ 9108896262, 9780627379, 9122194820 ],\n \"samples_ts\": [ 14.0522, 13.0871, 14.0317 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T00:50:55Z", "avg_ns": 3604853641, "stddev_ns": 311477405, "avg_ts": 35.676817, "stddev_ts": 2.936184, "samples_ns": [ 3964516089, 3425627943, 3424416891 ], "samples_ts": [ 32.2864, 37.3654, 37.3786 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T00:51:10Z", "avg_ns": 9337239487, "stddev_ns": 384042745, "avg_ts": 13.723667, "stddev_ts": 0.551383, "samples_ns": [ 9108896262, 9780627379, 9122194820 ], "samples_ts": [ 14.0522, 13.0871, 14.0317 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 268 }, { "timestamp_utc": "2025-12-09T00:53:46.737919+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:51:39Z\",\n \"avg_ns\": 3462771066,\n \"stddev_ns\": 10081293,\n \"avg_ts\": 36.964824,\n \"stddev_ts\": 0.107622,\n \"samples_ns\": [ 3452627543, 3462897051, 3472788605 ],\n \"samples_ts\": [ 37.0732, 36.9633, 36.858 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:51:53Z\",\n \"avg_ns\": 37853294608,\n \"stddev_ns\": 330821695,\n \"avg_ts\": 13.526596,\n \"stddev_ts\": 0.118816,\n \"samples_ns\": [ 38043322711, 38045264849, 37471296264 ],\n \"samples_ts\": [ 13.4583, 13.4577, 13.6638 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T00:51:39Z", "avg_ns": 3462771066, "stddev_ns": 10081293, "avg_ts": 36.964824, "stddev_ts": 0.107622, "samples_ns": [ 3452627543, 3462897051, 3472788605 ], "samples_ts": [ 37.0732, 36.9633, 36.858 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T00:51:53Z", "avg_ns": 37853294608, "stddev_ns": 330821695, "avg_ts": 13.526596, "stddev_ts": 0.118816, "samples_ns": [ 38043322711, 38045264849, 37471296264 ], "samples_ts": [ 13.4583, 13.4577, 13.6638 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 269 }, { "timestamp_utc": "2025-12-09T00:55:11.066041+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:53:47Z\",\n \"avg_ns\": 14042010903,\n \"stddev_ns\": 334861532,\n \"avg_ts\": 36.475655,\n \"stddev_ts\": 0.858036,\n \"samples_ns\": [ 14428639992, 13853245300, 13844147418 ],\n \"samples_ts\": [ 35.485, 36.9588, 36.9831 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:54:43Z\",\n \"avg_ns\": 9132129622,\n \"stddev_ns\": 15015540,\n \"avg_ts\": 14.016471,\n \"stddev_ts\": 0.023064,\n \"samples_ns\": [ 9115186221, 9143786947, 9137415699 ],\n \"samples_ts\": [ 14.0425, 13.9986, 14.0083 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T00:53:47Z", "avg_ns": 14042010903, "stddev_ns": 334861532, "avg_ts": 36.475655, "stddev_ts": 0.858036, "samples_ns": [ 14428639992, 13853245300, 13844147418 ], "samples_ts": [ 35.485, 36.9588, 36.9831 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T00:54:43Z", "avg_ns": 9132129622, "stddev_ns": 15015540, "avg_ts": 14.016471, "stddev_ts": 0.023064, "samples_ns": [ 9115186221, 9143786947, 9137415699 ], "samples_ts": [ 14.0425, 13.9986, 14.0083 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 270 }, { "timestamp_utc": "2025-12-09T00:58:00.809803+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:55:11Z\",\n \"avg_ns\": 14246757159,\n \"stddev_ns\": 345131780,\n \"avg_ts\": 35.952267,\n \"stddev_ts\": 0.883237,\n \"samples_ns\": [ 14458392513, 13848494951, 14433384013 ],\n \"samples_ts\": [ 35.412, 36.9715, 35.4733 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:56:08Z\",\n \"avg_ns\": 37413488373,\n \"stddev_ns\": 635468246,\n \"avg_ts\": 13.687543,\n \"stddev_ts\": 0.233058,\n \"samples_ns\": [ 36750715717, 38017581865, 37472167537 ],\n \"samples_ts\": [ 13.9317, 13.4675, 13.6635 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T00:55:11Z", "avg_ns": 14246757159, "stddev_ns": 345131780, "avg_ts": 35.952267, "stddev_ts": 0.883237, "samples_ns": [ 14458392513, 13848494951, 14433384013 ], "samples_ts": [ 35.412, 36.9715, 35.4733 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T00:56:08Z", "avg_ns": 37413488373, "stddev_ns": 635468246, "avg_ts": 13.687543, "stddev_ts": 0.233058, "samples_ns": [ 36750715717, 38017581865, 37472167537 ], "samples_ts": [ 13.9317, 13.4675, 13.6635 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 271 }, { "timestamp_utc": "2025-12-09T00:58:43.581776+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:58:01Z\",\n \"avg_ns\": 3434460273,\n \"stddev_ns\": 9571829,\n \"avg_ts\": 37.269513,\n \"stddev_ts\": 0.103883,\n \"samples_ns\": [ 3434829347, 3443841869, 3424709605 ],\n \"samples_ts\": [ 37.2653, 37.1678, 37.3754 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:58:15Z\",\n \"avg_ns\": 9357924478,\n \"stddev_ns\": 347141792,\n \"avg_ts\": 13.690545,\n \"stddev_ts\": 0.497285,\n \"samples_ns\": [ 9144734781, 9170546489, 9758492165 ],\n \"samples_ts\": [ 13.9971, 13.9577, 13.1168 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T00:58:01Z", "avg_ns": 3434460273, "stddev_ns": 9571829, "avg_ts": 37.269513, "stddev_ts": 0.103883, "samples_ns": [ 3434829347, 3443841869, 3424709605 ], "samples_ts": [ 37.2653, 37.1678, 37.3754 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T00:58:15Z", "avg_ns": 9357924478, "stddev_ns": 347141792, "avg_ts": 13.690545, "stddev_ts": 0.497285, "samples_ns": [ 9144734781, 9170546489, 9758492165 ], "samples_ts": [ 13.9971, 13.9577, 13.1168 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 272 }, { "timestamp_utc": "2025-12-09T01:00:51.979978+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:58:44Z\",\n \"avg_ns\": 3620762822,\n \"stddev_ns\": 333456925,\n \"avg_ts\": 35.542535,\n \"stddev_ts\": 3.108787,\n \"samples_ns\": [ 3418431963, 4005636168, 3438220337 ],\n \"samples_ts\": [ 37.4441, 31.955, 37.2286 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:58:58Z\",\n \"avg_ns\": 37724853277,\n \"stddev_ns\": 367852647,\n \"avg_ts\": 13.572810,\n \"stddev_ts\": 0.131644,\n \"samples_ns\": [ 37475035545, 37552257825, 38147266463 ],\n \"samples_ts\": [ 13.6624, 13.6343, 13.4217 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T00:58:44Z", "avg_ns": 3620762822, "stddev_ns": 333456925, "avg_ts": 35.542535, "stddev_ts": 3.108787, "samples_ns": [ 3418431963, 4005636168, 3438220337 ], "samples_ts": [ 37.4441, 31.955, 37.2286 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T00:58:58Z", "avg_ns": 37724853277, "stddev_ns": 367852647, "avg_ts": 13.57281, "stddev_ts": 0.131644, "samples_ns": [ 37475035545, 37552257825, 38147266463 ], "samples_ts": [ 13.6624, 13.6343, 13.4217 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 273 }, { "timestamp_utc": "2025-12-09T01:02:16.879209+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:00:52Z\",\n \"avg_ns\": 14192808569,\n \"stddev_ns\": 310783506,\n \"avg_ts\": 36.086000,\n \"stddev_ts\": 0.780408,\n \"samples_ns\": [ 14551317627, 13999777202, 14027330880 ],\n \"samples_ts\": [ 35.1858, 36.572, 36.5002 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:01:49Z\",\n \"avg_ns\": 9122748767,\n \"stddev_ns\": 29190988,\n \"avg_ts\": 14.030955,\n \"stddev_ts\": 0.044947,\n \"samples_ns\": [ 9090623291, 9147647746, 9129975264 ],\n \"samples_ts\": [ 14.0804, 13.9927, 14.0198 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T01:00:52Z", "avg_ns": 14192808569, "stddev_ns": 310783506, "avg_ts": 36.086, "stddev_ts": 0.780408, "samples_ns": [ 14551317627, 13999777202, 14027330880 ], "samples_ts": [ 35.1858, 36.572, 36.5002 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T01:01:49Z", "avg_ns": 9122748767, "stddev_ns": 29190988, "avg_ts": 14.030955, "stddev_ts": 0.044947, "samples_ns": [ 9090623291, 9147647746, 9129975264 ], "samples_ts": [ 14.0804, 13.9927, 14.0198 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 274 }, { "timestamp_utc": "2025-12-09T01:05:04.376278+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:02:17Z\",\n \"avg_ns\": 13965429996,\n \"stddev_ns\": 16407068,\n \"avg_ts\": 36.661991,\n \"stddev_ts\": 0.043100,\n \"samples_ns\": [ 13946504605, 13974151002, 13975634382 ],\n \"samples_ts\": [ 36.7117, 36.6391, 36.6352 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:03:13Z\",\n \"avg_ns\": 36881177735,\n \"stddev_ns\": 34664877,\n \"avg_ts\": 13.882428,\n \"stddev_ts\": 0.013042,\n \"samples_ns\": [ 36920684695, 36866993545, 36855854966 ],\n \"samples_ts\": [ 13.8676, 13.8878, 13.892 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T01:02:17Z", "avg_ns": 13965429996, "stddev_ns": 16407068, "avg_ts": 36.661991, "stddev_ts": 0.0431, "samples_ns": [ 13946504605, 13974151002, 13975634382 ], "samples_ts": [ 36.7117, 36.6391, 36.6352 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T01:03:13Z", "avg_ns": 36881177735, "stddev_ns": 34664877, "avg_ts": 13.882428, "stddev_ts": 0.013042, "samples_ns": [ 36920684695, 36866993545, 36855854966 ], "samples_ts": [ 13.8676, 13.8878, 13.892 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 275 }, { "timestamp_utc": "2025-12-09T01:05:46.614492+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:05:05Z\",\n \"avg_ns\": 3434047166,\n \"stddev_ns\": 5665761,\n \"avg_ts\": 37.273871,\n \"stddev_ts\": 0.061519,\n \"samples_ns\": [ 3434995002, 3439178926, 3427967571 ],\n \"samples_ts\": [ 37.2635, 37.2182, 37.3399 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:05:18Z\",\n \"avg_ns\": 9186503243,\n \"stddev_ns\": 25311275,\n \"avg_ts\": 13.933555,\n \"stddev_ts\": 0.038340,\n \"samples_ns\": [ 9215192427, 9167325567, 9176991735 ],\n \"samples_ts\": [ 13.8901, 13.9626, 13.9479 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T01:05:05Z", "avg_ns": 3434047166, "stddev_ns": 5665761, "avg_ts": 37.273871, "stddev_ts": 0.061519, "samples_ns": [ 3434995002, 3439178926, 3427967571 ], "samples_ts": [ 37.2635, 37.2182, 37.3399 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T01:05:18Z", "avg_ns": 9186503243, "stddev_ns": 25311275, "avg_ts": 13.933555, "stddev_ts": 0.03834, "samples_ns": [ 9215192427, 9167325567, 9176991735 ], "samples_ts": [ 13.8901, 13.9626, 13.9479 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 276 }, { "timestamp_utc": "2025-12-09T01:07:51.886800+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:05:47Z\",\n \"avg_ns\": 3443693945,\n \"stddev_ns\": 14472987,\n \"avg_ts\": 37.169826,\n \"stddev_ts\": 0.155850,\n \"samples_ns\": [ 3460339373, 3434082346, 3436660117 ],\n \"samples_ts\": [ 36.9906, 37.2734, 37.2455 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:06:01Z\",\n \"avg_ns\": 36847979393,\n \"stddev_ns\": 65578635,\n \"avg_ts\": 13.894957,\n \"stddev_ts\": 0.024748,\n \"samples_ns\": [ 36900081751, 36774341149, 36869515280 ],\n \"samples_ts\": [ 13.8753, 13.9228, 13.8868 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T01:05:47Z", "avg_ns": 3443693945, "stddev_ns": 14472987, "avg_ts": 37.169826, "stddev_ts": 0.15585, "samples_ns": [ 3460339373, 3434082346, 3436660117 ], "samples_ts": [ 36.9906, 37.2734, 37.2455 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T01:06:01Z", "avg_ns": 36847979393, "stddev_ns": 65578635, "avg_ts": 13.894957, "stddev_ts": 0.024748, "samples_ns": [ 36900081751, 36774341149, 36869515280 ], "samples_ts": [ 13.8753, 13.9228, 13.8868 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 277 }, { "timestamp_utc": "2025-12-09T01:09:15.470728+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:07:52Z\",\n \"avg_ns\": 13794633052,\n \"stddev_ns\": 8754928,\n \"avg_ts\": 37.115894,\n \"stddev_ts\": 0.023558,\n \"samples_ns\": [ 13784824147, 13797425583, 13801649428 ],\n \"samples_ts\": [ 37.1423, 37.1084, 37.097 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:08:47Z\",\n \"avg_ns\": 9141718047,\n \"stddev_ns\": 35561439,\n \"avg_ts\": 14.001886,\n \"stddev_ts\": 0.054577,\n \"samples_ns\": [ 9156534345, 9167475616, 9101144181 ],\n \"samples_ts\": [ 13.9791, 13.9624, 14.0642 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T01:07:52Z", "avg_ns": 13794633052, "stddev_ns": 8754928, "avg_ts": 37.115894, "stddev_ts": 0.023558, "samples_ns": [ 13784824147, 13797425583, 13801649428 ], "samples_ts": [ 37.1423, 37.1084, 37.097 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T01:08:47Z", "avg_ns": 9141718047, "stddev_ns": 35561439, "avg_ts": 14.001886, "stddev_ts": 0.054577, "samples_ns": [ 9156534345, 9167475616, 9101144181 ], "samples_ts": [ 13.9791, 13.9624, 14.0642 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 278 }, { "timestamp_utc": "2025-12-09T01:12:02.038010+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:09:16Z\",\n \"avg_ns\": 13783841080,\n \"stddev_ns\": 23193088,\n \"avg_ts\": 37.145014,\n \"stddev_ts\": 0.062454,\n \"samples_ns\": [ 13809890125, 13765432226, 13776200890 ],\n \"samples_ts\": [ 37.0749, 37.1946, 37.1655 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:10:11Z\",\n \"avg_ns\": 36832779855,\n \"stddev_ns\": 4603641,\n \"avg_ts\": 13.900662,\n \"stddev_ts\": 0.001736,\n \"samples_ns\": [ 36833204993, 36837152167, 36827982406 ],\n \"samples_ts\": [ 13.9005, 13.899, 13.9025 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T01:09:16Z", "avg_ns": 13783841080, "stddev_ns": 23193088, "avg_ts": 37.145014, "stddev_ts": 0.062454, "samples_ns": [ 13809890125, 13765432226, 13776200890 ], "samples_ts": [ 37.0749, 37.1946, 37.1655 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T01:10:11Z", "avg_ns": 36832779855, "stddev_ns": 4603641, "avg_ts": 13.900662, "stddev_ts": 0.001736, "samples_ns": [ 36833204993, 36837152167, 36827982406 ], "samples_ts": [ 13.9005, 13.899, 13.9025 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 279 }, { "timestamp_utc": "2025-12-09T01:12:44.330736+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:12:02Z\",\n \"avg_ns\": 3430598501,\n \"stddev_ns\": 3737415,\n \"avg_ts\": 37.311303,\n \"stddev_ts\": 0.040618,\n \"samples_ns\": [ 3428515012, 3434912729, 3428367763 ],\n \"samples_ts\": [ 37.3339, 37.2644, 37.3356 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:12:16Z\",\n \"avg_ns\": 9214361392,\n \"stddev_ns\": 16349437,\n \"avg_ts\": 13.891388,\n \"stddev_ts\": 0.024625,\n \"samples_ns\": [ 9232998088, 9202435349, 9207650740 ],\n \"samples_ts\": [ 13.8633, 13.9094, 13.9015 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T01:12:02Z", "avg_ns": 3430598501, "stddev_ns": 3737415, "avg_ts": 37.311303, "stddev_ts": 0.040618, "samples_ns": [ 3428515012, 3434912729, 3428367763 ], "samples_ts": [ 37.3339, 37.2644, 37.3356 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T01:12:16Z", "avg_ns": 9214361392, "stddev_ns": 16349437, "avg_ts": 13.891388, "stddev_ts": 0.024625, "samples_ns": [ 9232998088, 9202435349, 9207650740 ], "samples_ts": [ 13.8633, 13.9094, 13.9015 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 280 }, { "timestamp_utc": "2025-12-09T01:14:49.780791+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:12:45Z\",\n \"avg_ns\": 3431007219,\n \"stddev_ns\": 17041035,\n \"avg_ts\": 37.307441,\n \"stddev_ts\": 0.184932,\n \"samples_ns\": [ 3449977935, 3416996286, 3426047436 ],\n \"samples_ts\": [ 37.1017, 37.4598, 37.3608 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:12:58Z\",\n \"avg_ns\": 36936751949,\n \"stddev_ns\": 45648517,\n \"avg_ts\": 13.861547,\n \"stddev_ts\": 0.017142,\n \"samples_ns\": [ 36970129530, 36884732719, 36955393598 ],\n \"samples_ts\": [ 13.849, 13.8811, 13.8545 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T01:12:45Z", "avg_ns": 3431007219, "stddev_ns": 17041035, "avg_ts": 37.307441, "stddev_ts": 0.184932, "samples_ns": [ 3449977935, 3416996286, 3426047436 ], "samples_ts": [ 37.1017, 37.4598, 37.3608 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T01:12:58Z", "avg_ns": 36936751949, "stddev_ns": 45648517, "avg_ts": 13.861547, "stddev_ts": 0.017142, "samples_ns": [ 36970129530, 36884732719, 36955393598 ], "samples_ts": [ 13.849, 13.8811, 13.8545 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 281 }, { "timestamp_utc": "2025-12-09T01:16:15.136571+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:14:50Z\",\n \"avg_ns\": 13880167411,\n \"stddev_ns\": 34555530,\n \"avg_ts\": 36.887316,\n \"stddev_ts\": 0.091957,\n \"samples_ns\": [ 13840557540, 13904141069, 13895803624 ],\n \"samples_ts\": [ 36.9927, 36.8236, 36.8457 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:15:46Z\",\n \"avg_ns\": 9633197290,\n \"stddev_ns\": 24432084,\n \"avg_ts\": 13.287442,\n \"stddev_ts\": 0.033667,\n \"samples_ns\": [ 9612972328, 9660342983, 9626276560 ],\n \"samples_ts\": [ 13.3153, 13.25, 13.2969 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T01:14:50Z", "avg_ns": 13880167411, "stddev_ns": 34555530, "avg_ts": 36.887316, "stddev_ts": 0.091957, "samples_ns": [ 13840557540, 13904141069, 13895803624 ], "samples_ts": [ 36.9927, 36.8236, 36.8457 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T01:15:46Z", "avg_ns": 9633197290, "stddev_ns": 24432084, "avg_ts": 13.287442, "stddev_ts": 0.033667, "samples_ns": [ 9612972328, 9660342983, 9626276560 ], "samples_ts": [ 13.3153, 13.25, 13.2969 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 282 }, { "timestamp_utc": "2025-12-09T01:19:02.695543+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:16:15Z\",\n \"avg_ns\": 13853454505,\n \"stddev_ns\": 5514314,\n \"avg_ts\": 36.958295,\n \"stddev_ts\": 0.014714,\n \"samples_ns\": [ 13856402152, 13856868544, 13847092819 ],\n \"samples_ts\": [ 36.9504, 36.9492, 36.9753 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:17:11Z\",\n \"avg_ns\": 37063611721,\n \"stddev_ns\": 110634966,\n \"avg_ts\": 13.814170,\n \"stddev_ts\": 0.041201,\n \"samples_ns\": [ 37042404956, 37183315061, 36965115146 ],\n \"samples_ts\": [ 13.822, 13.7696, 13.8509 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T01:16:15Z", "avg_ns": 13853454505, "stddev_ns": 5514314, "avg_ts": 36.958295, "stddev_ts": 0.014714, "samples_ns": [ 13856402152, 13856868544, 13847092819 ], "samples_ts": [ 36.9504, 36.9492, 36.9753 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T01:17:11Z", "avg_ns": 37063611721, "stddev_ns": 110634966, "avg_ts": 13.81417, "stddev_ts": 0.041201, "samples_ns": [ 37042404956, 37183315061, 36965115146 ], "samples_ts": [ 13.822, 13.7696, 13.8509 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 283 }, { "timestamp_utc": "2025-12-09T01:19:44.927094+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:19:03Z\",\n \"avg_ns\": 3436291611,\n \"stddev_ns\": 6712702,\n \"avg_ts\": 37.249553,\n \"stddev_ts\": 0.072686,\n \"samples_ns\": [ 3443957134, 3433450481, 3431467220 ],\n \"samples_ts\": [ 37.1665, 37.2803, 37.3018 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:19:17Z\",\n \"avg_ns\": 9182592447,\n \"stddev_ns\": 23952333,\n \"avg_ts\": 13.939482,\n \"stddev_ts\": 0.036415,\n \"samples_ns\": [ 9154942351, 9196973583, 9195861408 ],\n \"samples_ts\": [ 13.9815, 13.9176, 13.9193 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T01:19:03Z", "avg_ns": 3436291611, "stddev_ns": 6712702, "avg_ts": 37.249553, "stddev_ts": 0.072686, "samples_ns": [ 3443957134, 3433450481, 3431467220 ], "samples_ts": [ 37.1665, 37.2803, 37.3018 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T01:19:17Z", "avg_ns": 9182592447, "stddev_ns": 23952333, "avg_ts": 13.939482, "stddev_ts": 0.036415, "samples_ns": [ 9154942351, 9196973583, 9195861408 ], "samples_ts": [ 13.9815, 13.9176, 13.9193 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 284 }, { "timestamp_utc": "2025-12-09T01:21:51.104749+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:19:45Z\",\n \"avg_ns\": 3424464521,\n \"stddev_ns\": 9379916,\n \"avg_ts\": 37.378293,\n \"stddev_ts\": 0.102328,\n \"samples_ns\": [ 3434404939, 3423219081, 3415769543 ],\n \"samples_ts\": [ 37.2699, 37.3917, 37.4733 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:19:59Z\",\n \"avg_ns\": 37182524409,\n \"stddev_ns\": 18320823,\n \"avg_ts\": 13.769912,\n \"stddev_ts\": 0.006784,\n \"samples_ns\": [ 37183929098, 37200100423, 37163543708 ],\n \"samples_ts\": [ 13.7694, 13.7634, 13.7769 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T01:19:45Z", "avg_ns": 3424464521, "stddev_ns": 9379916, "avg_ts": 37.378293, "stddev_ts": 0.102328, "samples_ns": [ 3434404939, 3423219081, 3415769543 ], "samples_ts": [ 37.2699, 37.3917, 37.4733 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T01:19:59Z", "avg_ns": 37182524409, "stddev_ns": 18320823, "avg_ts": 13.769912, "stddev_ts": 0.006784, "samples_ns": [ 37183929098, 37200100423, 37163543708 ], "samples_ts": [ 13.7694, 13.7634, 13.7769 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 285 }, { "timestamp_utc": "2025-12-09T01:23:15.754808+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:21:51Z\",\n \"avg_ns\": 14049957085,\n \"stddev_ns\": 48840951,\n \"avg_ts\": 36.441686,\n \"stddev_ts\": 0.126500,\n \"samples_ns\": [ 14035294213, 14104449677, 14010127366 ],\n \"samples_ts\": [ 36.4795, 36.3006, 36.545 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:22:48Z\",\n \"avg_ns\": 9177685697,\n \"stddev_ns\": 41877588,\n \"avg_ts\": 13.947065,\n \"stddev_ts\": 0.063807,\n \"samples_ns\": [ 9200418222, 9203280991, 9129357878 ],\n \"samples_ts\": [ 13.9124, 13.9081, 14.0207 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T01:21:51Z", "avg_ns": 14049957085, "stddev_ns": 48840951, "avg_ts": 36.441686, "stddev_ts": 0.1265, "samples_ns": [ 14035294213, 14104449677, 14010127366 ], "samples_ts": [ 36.4795, 36.3006, 36.545 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T01:22:48Z", "avg_ns": 9177685697, "stddev_ns": 41877588, "avg_ts": 13.947065, "stddev_ts": 0.063807, "samples_ns": [ 9200418222, 9203280991, 9129357878 ], "samples_ts": [ 13.9124, 13.9081, 14.0207 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 286 }, { "timestamp_utc": "2025-12-09T01:26:03.150132+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:23:16Z\",\n \"avg_ns\": 14015528361,\n \"stddev_ns\": 5231950,\n \"avg_ts\": 36.530913,\n \"stddev_ts\": 0.013629,\n \"samples_ns\": [ 14010731214, 14021102751, 14014751120 ],\n \"samples_ts\": [ 36.5434, 36.5164, 36.5329 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:24:12Z\",\n \"avg_ns\": 36790616286,\n \"stddev_ns\": 44945922,\n \"avg_ts\": 13.916606,\n \"stddev_ts\": 0.016997,\n \"samples_ns\": [ 36749177112, 36838394981, 36784276766 ],\n \"samples_ts\": [ 13.9323, 13.8985, 13.919 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T01:23:16Z", "avg_ns": 14015528361, "stddev_ns": 5231950, "avg_ts": 36.530913, "stddev_ts": 0.013629, "samples_ns": [ 14010731214, 14021102751, 14014751120 ], "samples_ts": [ 36.5434, 36.5164, 36.5329 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_type": "gemma3 1B Q4_K - Medium", "model_size": 799525120, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T01:24:12Z", "avg_ns": 36790616286, "stddev_ns": 44945922, "avg_ts": 13.916606, "stddev_ts": 0.016997, "samples_ns": [ 36749177112, 36838394981, 36784276766 ], "samples_ts": [ 13.9323, 13.8985, 13.919 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 287 }, { "timestamp_utc": "2025-12-09T01:27:24.835294+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:26:11Z\",\n \"avg_ns\": 5502647593,\n \"stddev_ns\": 30616759,\n \"avg_ts\": 23.262011,\n \"stddev_ts\": 0.129724,\n \"samples_ns\": [ 5527656554, 5468503130, 5511783097 ],\n \"samples_ts\": [ 23.1563, 23.4068, 23.223 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:26:33Z\",\n \"avg_ns\": 17171905733,\n \"stddev_ns\": 46536519,\n \"avg_ts\": 7.454072,\n \"stddev_ts\": 0.020228,\n \"samples_ns\": [ 17119051899, 17189936115, 17206729185 ],\n \"samples_ts\": [ 7.47705, 7.44622, 7.43895 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T01:26:11Z", "avg_ns": 5502647593, "stddev_ns": 30616759, "avg_ts": 23.262011, "stddev_ts": 0.129724, "samples_ns": [ 5527656554, 5468503130, 5511783097 ], "samples_ts": [ 23.1563, 23.4068, 23.223 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T01:26:33Z", "avg_ns": 17171905733, "stddev_ns": 46536519, "avg_ts": 7.454072, "stddev_ts": 0.020228, "samples_ns": [ 17119051899, 17189936115, 17206729185 ], "samples_ts": [ 7.47705, 7.44622, 7.43895 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 512, "ubatch_size": 128, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 288 }, { "timestamp_utc": "2025-12-09T01:31:15.719886+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:27:25Z\",\n \"avg_ns\": 5414467611,\n \"stddev_ns\": 11745444,\n \"avg_ts\": 23.640441,\n \"stddev_ts\": 0.051321,\n \"samples_ns\": [ 5401530816, 5424461566, 5417410452 ],\n \"samples_ts\": [ 23.697, 23.5968, 23.6275 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:27:47Z\",\n \"avg_ns\": 69427419872,\n \"stddev_ns\": 131252701,\n \"avg_ts\": 7.374625,\n \"stddev_ts\": 0.013929,\n \"samples_ns\": [ 69327198760, 69575988182, 69379072676 ],\n \"samples_ts\": [ 7.38527, 7.35886, 7.37975 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T01:27:25Z", "avg_ns": 5414467611, "stddev_ns": 11745444, "avg_ts": 23.640441, "stddev_ts": 0.051321, "samples_ns": [ 5401530816, 5424461566, 5417410452 ], "samples_ts": [ 23.697, 23.5968, 23.6275 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T01:27:47Z", "avg_ns": 69427419872, "stddev_ns": 131252701, "avg_ts": 7.374625, "stddev_ts": 0.013929, "samples_ns": [ 69327198760, 69575988182, 69379072676 ], "samples_ts": [ 7.38527, 7.35886, 7.37975 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 512, "ubatch_size": 128, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 289 }, { "timestamp_utc": "2025-12-09T01:33:36.920013+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:31:16Z\",\n \"avg_ns\": 21876366473,\n \"stddev_ns\": 25868848,\n \"avg_ts\": 23.404274,\n \"stddev_ts\": 0.027688,\n \"samples_ns\": [ 21897739231, 21883751602, 21847608587 ],\n \"samples_ts\": [ 23.3814, 23.3964, 23.4351 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:32:43Z\",\n \"avg_ns\": 17602951421,\n \"stddev_ns\": 44748078,\n \"avg_ts\": 7.271539,\n \"stddev_ts\": 0.018489,\n \"samples_ns\": [ 17556990511, 17646378181, 17605485573 ],\n \"samples_ts\": [ 7.29054, 7.25361, 7.27046 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T01:31:16Z", "avg_ns": 21876366473, "stddev_ns": 25868848, "avg_ts": 23.404274, "stddev_ts": 0.027688, "samples_ns": [ 21897739231, 21883751602, 21847608587 ], "samples_ts": [ 23.3814, 23.3964, 23.4351 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T01:32:43Z", "avg_ns": 17602951421, "stddev_ns": 44748078, "avg_ts": 7.271539, "stddev_ts": 0.018489, "samples_ns": [ 17556990511, 17646378181, 17605485573 ], "samples_ts": [ 7.29054, 7.25361, 7.27046 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 512, "ubatch_size": 128, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 290 }, { "timestamp_utc": "2025-12-09T01:38:39.002755+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:33:37Z\",\n \"avg_ns\": 21804867324,\n \"stddev_ns\": 20916046,\n \"avg_ts\": 23.481010,\n \"stddev_ts\": 0.022532,\n \"samples_ns\": [ 21822007448, 21811032300, 21781562225 ],\n \"samples_ts\": [ 23.4626, 23.4744, 23.5061 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:35:04Z\",\n \"avg_ns\": 71335514028,\n \"stddev_ns\": 35352152,\n \"avg_ts\": 7.177352,\n \"stddev_ts\": 0.003556,\n \"samples_ns\": [ 71373769121, 71304055912, 71328717053 ],\n \"samples_ts\": [ 7.1735, 7.18052, 7.17803 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T01:33:37Z", "avg_ns": 21804867324, "stddev_ns": 20916046, "avg_ts": 23.48101, "stddev_ts": 0.022532, "samples_ns": [ 21822007448, 21811032300, 21781562225 ], "samples_ts": [ 23.4626, 23.4744, 23.5061 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T01:35:04Z", "avg_ns": 71335514028, "stddev_ns": 35352152, "avg_ts": 7.177352, "stddev_ts": 0.003556, "samples_ns": [ 71373769121, 71304055912, 71328717053 ], "samples_ts": [ 7.1735, 7.18052, 7.17803 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 512, "ubatch_size": 128, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 291 }, { "timestamp_utc": "2025-12-09T01:39:53.334969+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:38:39Z\",\n \"avg_ns\": 5419347489,\n \"stddev_ns\": 4115049,\n \"avg_ts\": 23.619089,\n \"stddev_ts\": 0.017942,\n \"samples_ns\": [ 5421414184, 5422019572, 5414608711 ],\n \"samples_ts\": [ 23.6101, 23.6074, 23.6398 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:39:01Z\",\n \"avg_ns\": 17236380555,\n \"stddev_ns\": 38430619,\n \"avg_ts\": 7.426178,\n \"stddev_ts\": 0.016576,\n \"samples_ns\": [ 17251013775, 17265344989, 17192782901 ],\n \"samples_ts\": [ 7.41985, 7.41369, 7.44498 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T01:38:39Z", "avg_ns": 5419347489, "stddev_ns": 4115049, "avg_ts": 23.619089, "stddev_ts": 0.017942, "samples_ns": [ 5421414184, 5422019572, 5414608711 ], "samples_ts": [ 23.6101, 23.6074, 23.6398 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T01:39:01Z", "avg_ns": 17236380555, "stddev_ns": 38430619, "avg_ts": 7.426178, "stddev_ts": 0.016576, "samples_ns": [ 17251013775, 17265344989, 17192782901 ], "samples_ts": [ 7.41985, 7.41369, 7.44498 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 512, "ubatch_size": 256, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 292 }, { "timestamp_utc": "2025-12-09T01:43:43.786136+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:39:54Z\",\n \"avg_ns\": 5451094383,\n \"stddev_ns\": 1765493,\n \"avg_ts\": 23.481525,\n \"stddev_ts\": 0.007598,\n \"samples_ns\": [ 5451021539, 5452893626, 5449367985 ],\n \"samples_ts\": [ 23.4818, 23.4738, 23.489 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:40:15Z\",\n \"avg_ns\": 69229246374,\n \"stddev_ns\": 191362664,\n \"avg_ts\": 7.395756,\n \"stddev_ts\": 0.020417,\n \"samples_ns\": [ 69444932711, 69162987124, 69079819287 ],\n \"samples_ts\": [ 7.37275, 7.4028, 7.41172 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T01:39:54Z", "avg_ns": 5451094383, "stddev_ns": 1765493, "avg_ts": 23.481525, "stddev_ts": 0.007598, "samples_ns": [ 5451021539, 5452893626, 5449367985 ], "samples_ts": [ 23.4818, 23.4738, 23.489 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T01:40:15Z", "avg_ns": 69229246374, "stddev_ns": 191362664, "avg_ts": 7.395756, "stddev_ts": 0.020417, "samples_ns": [ 69444932711, 69162987124, 69079819287 ], "samples_ts": [ 7.37275, 7.4028, 7.41172 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 512, "ubatch_size": 256, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 293 }, { "timestamp_utc": "2025-12-09T01:46:04.003212+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:43:44Z\",\n \"avg_ns\": 21936897157,\n \"stddev_ns\": 17428604,\n \"avg_ts\": 23.339683,\n \"stddev_ts\": 0.018551,\n \"samples_ns\": [ 21949525805, 21944152807, 21917012859 ],\n \"samples_ts\": [ 23.3262, 23.332, 23.3608 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:45:12Z\",\n \"avg_ns\": 17160376802,\n \"stddev_ns\": 34987511,\n \"avg_ts\": 7.459064,\n \"stddev_ts\": 0.015190,\n \"samples_ns\": [ 17200703960, 17138111741, 17142314705 ],\n \"samples_ts\": [ 7.44156, 7.46873, 7.4669 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T01:43:44Z", "avg_ns": 21936897157, "stddev_ns": 17428604, "avg_ts": 23.339683, "stddev_ts": 0.018551, "samples_ns": [ 21949525805, 21944152807, 21917012859 ], "samples_ts": [ 23.3262, 23.332, 23.3608 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T01:45:12Z", "avg_ns": 17160376802, "stddev_ns": 34987511, "avg_ts": 7.459064, "stddev_ts": 0.01519, "samples_ns": [ 17200703960, 17138111741, 17142314705 ], "samples_ts": [ 7.44156, 7.46873, 7.4669 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 512, "ubatch_size": 256, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 294 }, { "timestamp_utc": "2025-12-09T01:51:07.066221+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:46:04Z\",\n \"avg_ns\": 21977696753,\n \"stddev_ns\": 83378060,\n \"avg_ts\": 23.296568,\n \"stddev_ts\": 0.088347,\n \"samples_ns\": [ 21897374334, 21971889379, 22063826547 ],\n \"samples_ts\": [ 23.3818, 23.3025, 23.2054 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:47:32Z\",\n \"avg_ns\": 71418701763,\n \"stddev_ns\": 45983535,\n \"avg_ts\": 7.168993,\n \"stddev_ts\": 0.004615,\n \"samples_ns\": [ 71467558516, 71412280297, 71376266476 ],\n \"samples_ts\": [ 7.16409, 7.16964, 7.17325 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T01:46:04Z", "avg_ns": 21977696753, "stddev_ns": 83378060, "avg_ts": 23.296568, "stddev_ts": 0.088347, "samples_ns": [ 21897374334, 21971889379, 22063826547 ], "samples_ts": [ 23.3818, 23.3025, 23.2054 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T01:47:32Z", "avg_ns": 71418701763, "stddev_ns": 45983535, "avg_ts": 7.168993, "stddev_ts": 0.004615, "samples_ns": [ 71467558516, 71412280297, 71376266476 ], "samples_ts": [ 7.16409, 7.16964, 7.17325 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 512, "ubatch_size": 256, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 295 }, { "timestamp_utc": "2025-12-09T01:52:21.174242+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:51:07Z\",\n \"avg_ns\": 5441576896,\n \"stddev_ns\": 16956613,\n \"avg_ts\": 23.522745,\n \"stddev_ts\": 0.073212,\n \"samples_ns\": [ 5427400682, 5460360818, 5436969190 ],\n \"samples_ts\": [ 23.584, 23.4417, 23.5425 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:51:29Z\",\n \"avg_ns\": 17154976742,\n \"stddev_ns\": 49931533,\n \"avg_ts\": 7.461434,\n \"stddev_ts\": 0.021705,\n \"samples_ns\": [ 17108628991, 17207850034, 17148451202 ],\n \"samples_ts\": [ 7.4816, 7.43847, 7.46423 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T01:51:07Z", "avg_ns": 5441576896, "stddev_ns": 16956613, "avg_ts": 23.522745, "stddev_ts": 0.073212, "samples_ns": [ 5427400682, 5460360818, 5436969190 ], "samples_ts": [ 23.584, 23.4417, 23.5425 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T01:51:29Z", "avg_ns": 17154976742, "stddev_ns": 49931533, "avg_ts": 7.461434, "stddev_ts": 0.021705, "samples_ns": [ 17108628991, 17207850034, 17148451202 ], "samples_ts": [ 7.4816, 7.43847, 7.46423 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 512, "ubatch_size": 512, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 296 }, { "timestamp_utc": "2025-12-09T01:56:13.450038+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:52:21Z\",\n \"avg_ns\": 5441612002,\n \"stddev_ns\": 21249385,\n \"avg_ts\": 23.522680,\n \"stddev_ts\": 0.091677,\n \"samples_ns\": [ 5425759684, 5465757086, 5433319238 ],\n \"samples_ts\": [ 23.5912, 23.4185, 23.5583 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:52:43Z\",\n \"avg_ns\": 69867928927,\n \"stddev_ns\": 72515347,\n \"avg_ts\": 7.328117,\n \"stddev_ts\": 0.007605,\n \"samples_ns\": [ 69942035492, 69864632108, 69797119183 ],\n \"samples_ts\": [ 7.32035, 7.32846, 7.33555 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T01:52:21Z", "avg_ns": 5441612002, "stddev_ns": 21249385, "avg_ts": 23.52268, "stddev_ts": 0.091677, "samples_ns": [ 5425759684, 5465757086, 5433319238 ], "samples_ts": [ 23.5912, 23.4185, 23.5583 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T01:52:43Z", "avg_ns": 69867928927, "stddev_ns": 72515347, "avg_ts": 7.328117, "stddev_ts": 0.007605, "samples_ns": [ 69942035492, 69864632108, 69797119183 ], "samples_ts": [ 7.32035, 7.32846, 7.33555 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 512, "ubatch_size": 512, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 297 }, { "timestamp_utc": "2025-12-09T01:58:37.197744+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:56:14Z\",\n \"avg_ns\": 22349753355,\n \"stddev_ns\": 52951143,\n \"avg_ts\": 22.908616,\n \"stddev_ts\": 0.054338,\n \"samples_ns\": [ 22370040889, 22289658480, 22389560697 ],\n \"samples_ts\": [ 22.8878, 22.9703, 22.8678 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:57:43Z\",\n \"avg_ns\": 17847171829,\n \"stddev_ns\": 66459069,\n \"avg_ts\": 7.172071,\n \"stddev_ts\": 0.026663,\n \"samples_ns\": [ 17824752867, 17794821773, 17921940849 ],\n \"samples_ts\": [ 7.18103, 7.1931, 7.14208 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T01:56:14Z", "avg_ns": 22349753355, "stddev_ns": 52951143, "avg_ts": 22.908616, "stddev_ts": 0.054338, "samples_ns": [ 22370040889, 22289658480, 22389560697 ], "samples_ts": [ 22.8878, 22.9703, 22.8678 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T01:57:43Z", "avg_ns": 17847171829, "stddev_ns": 66459069, "avg_ts": 7.172071, "stddev_ts": 0.026663, "samples_ns": [ 17824752867, 17794821773, 17921940849 ], "samples_ts": [ 7.18103, 7.1931, 7.14208 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 512, "ubatch_size": 512, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 298 }, { "timestamp_utc": "2025-12-09T02:03:35.292598+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:58:37Z\",\n \"avg_ns\": 22228778688,\n \"stddev_ns\": 38267675,\n \"avg_ts\": 23.033250,\n \"stddev_ts\": 0.039616,\n \"samples_ns\": [ 22202074767, 22272619834, 22211641463 ],\n \"samples_ts\": [ 23.0609, 22.9879, 23.051 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:00:06Z\",\n \"avg_ns\": 69411670427,\n \"stddev_ns\": 177295264,\n \"avg_ts\": 7.376313,\n \"stddev_ts\": 0.018851,\n \"samples_ns\": [ 69575856792, 69435481304, 69223673186 ],\n \"samples_ts\": [ 7.35887, 7.37375, 7.39631 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T01:58:37Z", "avg_ns": 22228778688, "stddev_ns": 38267675, "avg_ts": 23.03325, "stddev_ts": 0.039616, "samples_ns": [ 22202074767, 22272619834, 22211641463 ], "samples_ts": [ 23.0609, 22.9879, 23.051 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T02:00:06Z", "avg_ns": 69411670427, "stddev_ns": 177295264, "avg_ts": 7.376313, "stddev_ts": 0.018851, "samples_ns": [ 69575856792, 69435481304, 69223673186 ], "samples_ts": [ 7.35887, 7.37375, 7.39631 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 512, "ubatch_size": 512, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 299 }, { "timestamp_utc": "2025-12-09T02:04:49.545715+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:03:35Z\",\n \"avg_ns\": 5491879627,\n \"stddev_ns\": 19201642,\n \"avg_ts\": 23.307329,\n \"stddev_ts\": 0.081618,\n \"samples_ns\": [ 5498372323, 5470273657, 5506992903 ],\n \"samples_ts\": [ 23.2796, 23.3992, 23.2432 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:03:57Z\",\n \"avg_ns\": 17135665562,\n \"stddev_ns\": 8133801,\n \"avg_ts\": 7.469801,\n \"stddev_ts\": 0.003547,\n \"samples_ns\": [ 17140703712, 17126281976, 17140010998 ],\n \"samples_ts\": [ 7.4676, 7.47389, 7.46791 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T02:03:35Z", "avg_ns": 5491879627, "stddev_ns": 19201642, "avg_ts": 23.307329, "stddev_ts": 0.081618, "samples_ns": [ 5498372323, 5470273657, 5506992903 ], "samples_ts": [ 23.2796, 23.3992, 23.2432 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T02:03:57Z", "avg_ns": 17135665562, "stddev_ns": 8133801, "avg_ts": 7.469801, "stddev_ts": 0.003547, "samples_ns": [ 17140703712, 17126281976, 17140010998 ], "samples_ts": [ 7.4676, 7.47389, 7.46791 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 300 }, { "timestamp_utc": "2025-12-09T02:08:39.487322+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:04:50Z\",\n \"avg_ns\": 5437534907,\n \"stddev_ns\": 14029882,\n \"avg_ts\": 23.540183,\n \"stddev_ts\": 0.060703,\n \"samples_ns\": [ 5452509448, 5435401136, 5424694137 ],\n \"samples_ts\": [ 23.4754, 23.5493, 23.5958 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:05:11Z\",\n \"avg_ns\": 69108086638,\n \"stddev_ns\": 105934511,\n \"avg_ts\": 7.408696,\n \"stddev_ts\": 0.011349,\n \"samples_ns\": [ 69226767875, 69023090734, 69074401305 ],\n \"samples_ts\": [ 7.39598, 7.41781, 7.4123 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T02:04:50Z", "avg_ns": 5437534907, "stddev_ns": 14029882, "avg_ts": 23.540183, "stddev_ts": 0.060703, "samples_ns": [ 5452509448, 5435401136, 5424694137 ], "samples_ts": [ 23.4754, 23.5493, 23.5958 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T02:05:11Z", "avg_ns": 69108086638, "stddev_ns": 105934511, "avg_ts": 7.408696, "stddev_ts": 0.011349, "samples_ns": [ 69226767875, 69023090734, 69074401305 ], "samples_ts": [ 7.39598, 7.41781, 7.4123 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 301 }, { "timestamp_utc": "2025-12-09T02:10:58.851091+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:08:40Z\",\n \"avg_ns\": 21744075752,\n \"stddev_ns\": 10783467,\n \"avg_ts\": 23.546647,\n \"stddev_ts\": 0.011672,\n \"samples_ns\": [ 21756521900, 21737608264, 21738097094 ],\n \"samples_ts\": [ 23.5332, 23.5536, 23.5531 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:10:07Z\",\n \"avg_ns\": 17173675345,\n \"stddev_ns\": 60354809,\n \"avg_ts\": 7.453329,\n \"stddev_ts\": 0.026247,\n \"samples_ns\": [ 17208124878, 17103985439, 17208915720 ],\n \"samples_ts\": [ 7.43835, 7.48364, 7.438 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T02:08:40Z", "avg_ns": 21744075752, "stddev_ns": 10783467, "avg_ts": 23.546647, "stddev_ts": 0.011672, "samples_ns": [ 21756521900, 21737608264, 21738097094 ], "samples_ts": [ 23.5332, 23.5536, 23.5531 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T02:10:07Z", "avg_ns": 17173675345, "stddev_ns": 60354809, "avg_ts": 7.453329, "stddev_ts": 0.026247, "samples_ns": [ 17208124878, 17103985439, 17208915720 ], "samples_ts": [ 7.43835, 7.48364, 7.438 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 302 }, { "timestamp_utc": "2025-12-09T02:15:54.647282+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:10:59Z\",\n \"avg_ns\": 21876602999,\n \"stddev_ns\": 42455421,\n \"avg_ts\": 23.404058,\n \"stddev_ts\": 0.045369,\n \"samples_ns\": [ 21853143788, 21851054059, 21925611151 ],\n \"samples_ts\": [ 23.4291, 23.4314, 23.3517 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:12:27Z\",\n \"avg_ns\": 69146988154,\n \"stddev_ns\": 59647915,\n \"avg_ts\": 7.404520,\n \"stddev_ts\": 0.006388,\n \"samples_ns\": [ 69204821582, 69085677849, 69150465031 ],\n \"samples_ts\": [ 7.39833, 7.41109, 7.40414 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T02:10:59Z", "avg_ns": 21876602999, "stddev_ns": 42455421, "avg_ts": 23.404058, "stddev_ts": 0.045369, "samples_ns": [ 21853143788, 21851054059, 21925611151 ], "samples_ts": [ 23.4291, 23.4314, 23.3517 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T02:12:27Z", "avg_ns": 69146988154, "stddev_ns": 59647915, "avg_ts": 7.40452, "stddev_ts": 0.006388, "samples_ns": [ 69204821582, 69085677849, 69150465031 ], "samples_ts": [ 7.39833, 7.41109, 7.40414 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 303 }, { "timestamp_utc": "2025-12-09T02:17:08.762228+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:15:55Z\",\n \"avg_ns\": 5441686361,\n \"stddev_ns\": 7417940,\n \"avg_ts\": 23.522149,\n \"stddev_ts\": 0.032071,\n \"samples_ns\": [ 5448711974, 5442416950, 5433930159 ],\n \"samples_ts\": [ 23.4918, 23.519, 23.5557 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:16:17Z\",\n \"avg_ns\": 17145313606,\n \"stddev_ns\": 28792557,\n \"avg_ts\": 7.465611,\n \"stddev_ts\": 0.012545,\n \"samples_ns\": [ 17169121723, 17113312494, 17153506601 ],\n \"samples_ts\": [ 7.45524, 7.47956, 7.46203 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T02:15:55Z", "avg_ns": 5441686361, "stddev_ns": 7417940, "avg_ts": 23.522149, "stddev_ts": 0.032071, "samples_ns": [ 5448711974, 5442416950, 5433930159 ], "samples_ts": [ 23.4918, 23.519, 23.5557 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T02:16:17Z", "avg_ns": 17145313606, "stddev_ns": 28792557, "avg_ts": 7.465611, "stddev_ts": 0.012545, "samples_ns": [ 17169121723, 17113312494, 17153506601 ], "samples_ts": [ 7.45524, 7.47956, 7.46203 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 304 }, { "timestamp_utc": "2025-12-09T02:20:58.966324+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:17:09Z\",\n \"avg_ns\": 5409903228,\n \"stddev_ns\": 17532996,\n \"avg_ts\": 23.660478,\n \"stddev_ts\": 0.076810,\n \"samples_ns\": [ 5417206189, 5389899193, 5422604302 ],\n \"samples_ts\": [ 23.6284, 23.7481, 23.6049 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:17:31Z\",\n \"avg_ns\": 69213787097,\n \"stddev_ns\": 35911697,\n \"avg_ts\": 7.397371,\n \"stddev_ts\": 0.003837,\n \"samples_ns\": [ 69252288920, 69181205062, 69207867311 ],\n \"samples_ts\": [ 7.39326, 7.40085, 7.398 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T02:17:09Z", "avg_ns": 5409903228, "stddev_ns": 17532996, "avg_ts": 23.660478, "stddev_ts": 0.07681, "samples_ns": [ 5417206189, 5389899193, 5422604302 ], "samples_ts": [ 23.6284, 23.7481, 23.6049 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T02:17:31Z", "avg_ns": 69213787097, "stddev_ns": 35911697, "avg_ts": 7.397371, "stddev_ts": 0.003837, "samples_ns": [ 69252288920, 69181205062, 69207867311 ], "samples_ts": [ 7.39326, 7.40085, 7.398 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 305 }, { "timestamp_utc": "2025-12-09T02:23:19.015608+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:20:59Z\",\n \"avg_ns\": 21933955245,\n \"stddev_ns\": 48787110,\n \"avg_ts\": 23.342880,\n \"stddev_ts\": 0.051953,\n \"samples_ns\": [ 21977368114, 21881158130, 21943339493 ],\n \"samples_ts\": [ 23.2967, 23.3991, 23.3328 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:22:27Z\",\n \"avg_ns\": 17152618675,\n \"stddev_ns\": 57581974,\n \"avg_ts\": 7.462473,\n \"stddev_ts\": 0.025022,\n \"samples_ns\": [ 17138174204, 17216047687, 17103634134 ],\n \"samples_ts\": [ 7.46871, 7.43492, 7.48379 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T02:20:59Z", "avg_ns": 21933955245, "stddev_ns": 48787110, "avg_ts": 23.34288, "stddev_ts": 0.051953, "samples_ns": [ 21977368114, 21881158130, 21943339493 ], "samples_ts": [ 23.2967, 23.3991, 23.3328 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T02:22:27Z", "avg_ns": 17152618675, "stddev_ns": 57581974, "avg_ts": 7.462473, "stddev_ts": 0.025022, "samples_ns": [ 17138174204, 17216047687, 17103634134 ], "samples_ts": [ 7.46871, 7.43492, 7.48379 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 306 }, { "timestamp_utc": "2025-12-09T02:28:15.970939+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:23:19Z\",\n \"avg_ns\": 21912516313,\n \"stddev_ns\": 52541022,\n \"avg_ts\": 23.365731,\n \"stddev_ts\": 0.056096,\n \"samples_ns\": [ 21949815515, 21852428899, 21935304527 ],\n \"samples_ts\": [ 23.3259, 23.4299, 23.3414 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:24:47Z\",\n \"avg_ns\": 69480181418,\n \"stddev_ns\": 51536861,\n \"avg_ts\": 7.369011,\n \"stddev_ts\": 0.005464,\n \"samples_ns\": [ 69438988856, 69463585110, 69537970290 ],\n \"samples_ts\": [ 7.37338, 7.37077, 7.36288 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T02:23:19Z", "avg_ns": 21912516313, "stddev_ns": 52541022, "avg_ts": 23.365731, "stddev_ts": 0.056096, "samples_ns": [ 21949815515, 21852428899, 21935304527 ], "samples_ts": [ 23.3259, 23.4299, 23.3414 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T02:24:47Z", "avg_ns": 69480181418, "stddev_ns": 51536861, "avg_ts": 7.369011, "stddev_ts": 0.005464, "samples_ns": [ 69438988856, 69463585110, 69537970290 ], "samples_ts": [ 7.37338, 7.37077, 7.36288 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 307 }, { "timestamp_utc": "2025-12-09T02:29:29.861030+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:28:16Z\",\n \"avg_ns\": 5417626714,\n \"stddev_ns\": 18001373,\n \"avg_ts\": 23.626755,\n \"stddev_ts\": 0.078364,\n \"samples_ns\": [ 5409475588, 5405143052, 5438261504 ],\n \"samples_ts\": [ 23.6622, 23.6811, 23.5369 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:28:38Z\",\n \"avg_ns\": 17116637624,\n \"stddev_ns\": 49580069,\n \"avg_ts\": 7.478146,\n \"stddev_ts\": 0.021681,\n \"samples_ns\": [ 17062526125, 17127503616, 17159883133 ],\n \"samples_ts\": [ 7.50182, 7.47336, 7.45926 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T02:28:16Z", "avg_ns": 5417626714, "stddev_ns": 18001373, "avg_ts": 23.626755, "stddev_ts": 0.078364, "samples_ns": [ 5409475588, 5405143052, 5438261504 ], "samples_ts": [ 23.6622, 23.6811, 23.5369 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T02:28:38Z", "avg_ns": 17116637624, "stddev_ns": 49580069, "avg_ts": 7.478146, "stddev_ts": 0.021681, "samples_ns": [ 17062526125, 17127503616, 17159883133 ], "samples_ts": [ 7.50182, 7.47336, 7.45926 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 308 }, { "timestamp_utc": "2025-12-09T02:33:19.934164+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:29:30Z\",\n \"avg_ns\": 5435340276,\n \"stddev_ns\": 4925420,\n \"avg_ts\": 23.549596,\n \"stddev_ts\": 0.021349,\n \"samples_ns\": [ 5436973451, 5429805710, 5439241667 ],\n \"samples_ts\": [ 23.5425, 23.5736, 23.5327 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:29:52Z\",\n \"avg_ns\": 69147722330,\n \"stddev_ns\": 91690718,\n \"avg_ts\": 7.404446,\n \"stddev_ts\": 0.009824,\n \"samples_ns\": [ 69220457468, 69044727069, 69177982455 ],\n \"samples_ts\": [ 7.39666, 7.41548, 7.4012 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T02:29:30Z", "avg_ns": 5435340276, "stddev_ns": 4925420, "avg_ts": 23.549596, "stddev_ts": 0.021349, "samples_ns": [ 5436973451, 5429805710, 5439241667 ], "samples_ts": [ 23.5425, 23.5736, 23.5327 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T02:29:52Z", "avg_ns": 69147722330, "stddev_ns": 91690718, "avg_ts": 7.404446, "stddev_ts": 0.009824, "samples_ns": [ 69220457468, 69044727069, 69177982455 ], "samples_ts": [ 7.39666, 7.41548, 7.4012 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 309 }, { "timestamp_utc": "2025-12-09T02:35:41.187096+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:33:20Z\",\n \"avg_ns\": 22218491213,\n \"stddev_ns\": 40379238,\n \"avg_ts\": 23.043920,\n \"stddev_ts\": 0.041842,\n \"samples_ns\": [ 22187850199, 22203376797, 22264246645 ],\n \"samples_ts\": [ 23.0757, 23.0596, 22.9965 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:34:49Z\",\n \"avg_ns\": 17120812830,\n \"stddev_ns\": 35767243,\n \"avg_ts\": 7.476302,\n \"stddev_ts\": 0.015602,\n \"samples_ns\": [ 17161686854, 17095250952, 17105500685 ],\n \"samples_ts\": [ 7.45847, 7.48746, 7.48297 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T02:33:20Z", "avg_ns": 22218491213, "stddev_ns": 40379238, "avg_ts": 23.04392, "stddev_ts": 0.041842, "samples_ns": [ 22187850199, 22203376797, 22264246645 ], "samples_ts": [ 23.0757, 23.0596, 22.9965 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T02:34:49Z", "avg_ns": 17120812830, "stddev_ns": 35767243, "avg_ts": 7.476302, "stddev_ts": 0.015602, "samples_ns": [ 17161686854, 17095250952, 17105500685 ], "samples_ts": [ 7.45847, 7.48746, 7.48297 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 310 }, { "timestamp_utc": "2025-12-09T02:40:38.103212+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:35:41Z\",\n \"avg_ns\": 22167688827,\n \"stddev_ns\": 53458761,\n \"avg_ts\": 23.096769,\n \"stddev_ts\": 0.055749,\n \"samples_ns\": [ 22182025170, 22108523839, 22212517473 ],\n \"samples_ts\": [ 23.0818, 23.1585, 23.0501 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:37:10Z\",\n \"avg_ns\": 69102126000,\n \"stddev_ns\": 129544186,\n \"avg_ts\": 7.409341,\n \"stddev_ts\": 0.013905,\n \"samples_ns\": [ 69179220556, 69174591696, 68952565750 ],\n \"samples_ts\": [ 7.40107, 7.40156, 7.42539 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T02:35:41Z", "avg_ns": 22167688827, "stddev_ns": 53458761, "avg_ts": 23.096769, "stddev_ts": 0.055749, "samples_ns": [ 22182025170, 22108523839, 22212517473 ], "samples_ts": [ 23.0818, 23.1585, 23.0501 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T02:37:10Z", "avg_ns": 69102126000, "stddev_ns": 129544186, "avg_ts": 7.409341, "stddev_ts": 0.013905, "samples_ns": [ 69179220556, 69174591696, 68952565750 ], "samples_ts": [ 7.40107, 7.40156, 7.42539 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 311 }, { "timestamp_utc": "2025-12-09T02:41:51.997942+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:40:38Z\",\n \"avg_ns\": 5422209357,\n \"stddev_ns\": 3420346,\n \"avg_ts\": 23.606620,\n \"stddev_ts\": 0.014894,\n \"samples_ns\": [ 5418524964, 5422819602, 5425283505 ],\n \"samples_ts\": [ 23.6227, 23.604, 23.5932 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:41:00Z\",\n \"avg_ns\": 17107041354,\n \"stddev_ns\": 37088714,\n \"avg_ts\": 7.482323,\n \"stddev_ts\": 0.016202,\n \"samples_ns\": [ 17088953727, 17082466705, 17149703630 ],\n \"samples_ts\": [ 7.49022, 7.49306, 7.46369 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T02:40:38Z", "avg_ns": 5422209357, "stddev_ns": 3420346, "avg_ts": 23.60662, "stddev_ts": 0.014894, "samples_ns": [ 5418524964, 5422819602, 5425283505 ], "samples_ts": [ 23.6227, 23.604, 23.5932 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T02:41:00Z", "avg_ns": 17107041354, "stddev_ns": 37088714, "avg_ts": 7.482323, "stddev_ts": 0.016202, "samples_ns": [ 17088953727, 17082466705, 17149703630 ], "samples_ts": [ 7.49022, 7.49306, 7.46369 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 312 }, { "timestamp_utc": "2025-12-09T02:45:42.423008+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:41:52Z\",\n \"avg_ns\": 5437426252,\n \"stddev_ns\": 7546827,\n \"avg_ts\": 23.540579,\n \"stddev_ts\": 0.032647,\n \"samples_ns\": [ 5431814495, 5434459363, 5446004900 ],\n \"samples_ts\": [ 23.5649, 23.5534, 23.5035 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:42:14Z\",\n \"avg_ns\": 69245291274,\n \"stddev_ns\": 129772115,\n \"avg_ts\": 7.394022,\n \"stddev_ts\": 0.013845,\n \"samples_ns\": [ 69201885248, 69142786393, 69391202183 ],\n \"samples_ts\": [ 7.39864, 7.40497, 7.37846 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T02:41:52Z", "avg_ns": 5437426252, "stddev_ns": 7546827, "avg_ts": 23.540579, "stddev_ts": 0.032647, "samples_ns": [ 5431814495, 5434459363, 5446004900 ], "samples_ts": [ 23.5649, 23.5534, 23.5035 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T02:42:14Z", "avg_ns": 69245291274, "stddev_ns": 129772115, "avg_ts": 7.394022, "stddev_ts": 0.013845, "samples_ns": [ 69201885248, 69142786393, 69391202183 ], "samples_ts": [ 7.39864, 7.40497, 7.37846 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 313 }, { "timestamp_utc": "2025-12-09T02:48:02.466907+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:45:43Z\",\n \"avg_ns\": 21918671330,\n \"stddev_ns\": 40848060,\n \"avg_ts\": 23.359134,\n \"stddev_ts\": 0.043576,\n \"samples_ns\": [ 21946713309, 21871805589, 21937495093 ],\n \"samples_ts\": [ 23.3292, 23.4091, 23.339 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:47:10Z\",\n \"avg_ns\": 17157168665,\n \"stddev_ns\": 73028297,\n \"avg_ts\": 7.460528,\n \"stddev_ts\": 0.031681,\n \"samples_ns\": [ 17122539125, 17107897316, 17241069555 ],\n \"samples_ts\": [ 7.47553, 7.48192, 7.42413 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T02:45:43Z", "avg_ns": 21918671330, "stddev_ns": 40848060, "avg_ts": 23.359134, "stddev_ts": 0.043576, "samples_ns": [ 21946713309, 21871805589, 21937495093 ], "samples_ts": [ 23.3292, 23.4091, 23.339 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T02:47:10Z", "avg_ns": 17157168665, "stddev_ns": 73028297, "avg_ts": 7.460528, "stddev_ts": 0.031681, "samples_ns": [ 17122539125, 17107897316, 17241069555 ], "samples_ts": [ 7.47553, 7.48192, 7.42413 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 314 }, { "timestamp_utc": "2025-12-09T02:52:58.120240+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:48:03Z\",\n \"avg_ns\": 21789629015,\n \"stddev_ns\": 34451824,\n \"avg_ts\": 23.497456,\n \"stddev_ts\": 0.037136,\n \"samples_ns\": [ 21758917734, 21783086727, 21826882585 ],\n \"samples_ts\": [ 23.5306, 23.5045, 23.4573 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:49:30Z\",\n \"avg_ns\": 69171232961,\n \"stddev_ns\": 62907730,\n \"avg_ts\": 7.401925,\n \"stddev_ts\": 0.006728,\n \"samples_ns\": [ 69130352525, 69243671609, 69139674751 ],\n \"samples_ts\": [ 7.4063, 7.39418, 7.4053 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T02:48:03Z", "avg_ns": 21789629015, "stddev_ns": 34451824, "avg_ts": 23.497456, "stddev_ts": 0.037136, "samples_ns": [ 21758917734, 21783086727, 21826882585 ], "samples_ts": [ 23.5306, 23.5045, 23.4573 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T02:49:30Z", "avg_ns": 69171232961, "stddev_ns": 62907730, "avg_ts": 7.401925, "stddev_ts": 0.006728, "samples_ns": [ 69130352525, 69243671609, 69139674751 ], "samples_ts": [ 7.4063, 7.39418, 7.4053 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 315 }, { "timestamp_utc": "2025-12-09T02:54:12.111719+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:52:58Z\",\n \"avg_ns\": 5418292998,\n \"stddev_ns\": 32443181,\n \"avg_ts\": 23.624240,\n \"stddev_ts\": 0.141137,\n \"samples_ns\": [ 5409486415, 5391162463, 5454230118 ],\n \"samples_ts\": [ 23.6621, 23.7426, 23.468 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:53:20Z\",\n \"avg_ns\": 17133879458,\n \"stddev_ns\": 38715297,\n \"avg_ts\": 7.470604,\n \"stddev_ts\": 0.016883,\n \"samples_ns\": [ 17171546907, 17094195974, 17135895495 ],\n \"samples_ts\": [ 7.45419, 7.48792, 7.4697 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T02:52:58Z", "avg_ns": 5418292998, "stddev_ns": 32443181, "avg_ts": 23.62424, "stddev_ts": 0.141137, "samples_ns": [ 5409486415, 5391162463, 5454230118 ], "samples_ts": [ 23.6621, 23.7426, 23.468 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T02:53:20Z", "avg_ns": 17133879458, "stddev_ns": 38715297, "avg_ts": 7.470604, "stddev_ts": 0.016883, "samples_ns": [ 17171546907, 17094195974, 17135895495 ], "samples_ts": [ 7.45419, 7.48792, 7.4697 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 316 }, { "timestamp_utc": "2025-12-09T02:58:03.221713+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:54:12Z\",\n \"avg_ns\": 5422059170,\n \"stddev_ns\": 6608069,\n \"avg_ts\": 23.607291,\n \"stddev_ts\": 0.028788,\n \"samples_ns\": [ 5425877274, 5425870460, 5414429778 ],\n \"samples_ts\": [ 23.5907, 23.5907, 23.6405 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:54:34Z\",\n \"avg_ns\": 69495469031,\n \"stddev_ns\": 88338230,\n \"avg_ts\": 7.367395,\n \"stddev_ts\": 0.009369,\n \"samples_ns\": [ 69572867839, 69514307202, 69399232054 ],\n \"samples_ts\": [ 7.35919, 7.36539, 7.3776 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T02:54:12Z", "avg_ns": 5422059170, "stddev_ns": 6608069, "avg_ts": 23.607291, "stddev_ts": 0.028788, "samples_ns": [ 5425877274, 5425870460, 5414429778 ], "samples_ts": [ 23.5907, 23.5907, 23.6405 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T02:54:34Z", "avg_ns": 69495469031, "stddev_ns": 88338230, "avg_ts": 7.367395, "stddev_ts": 0.009369, "samples_ns": [ 69572867839, 69514307202, 69399232054 ], "samples_ts": [ 7.35919, 7.36539, 7.3776 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 317 }, { "timestamp_utc": "2025-12-09T03:00:25.152017+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:58:03Z\",\n \"avg_ns\": 22032544094,\n \"stddev_ns\": 8793195,\n \"avg_ts\": 23.238354,\n \"stddev_ts\": 0.009275,\n \"samples_ns\": [ 22036466653, 22022473659, 22038691971 ],\n \"samples_ts\": [ 23.2342, 23.249, 23.2319 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:59:32Z\",\n \"avg_ns\": 17621411430,\n \"stddev_ns\": 50760253,\n \"avg_ts\": 7.263931,\n \"stddev_ts\": 0.020954,\n \"samples_ns\": [ 17563712487, 17659187943, 17641333862 ],\n \"samples_ts\": [ 7.28775, 7.24835, 7.25569 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T02:58:03Z", "avg_ns": 22032544094, "stddev_ns": 8793195, "avg_ts": 23.238354, "stddev_ts": 0.009275, "samples_ns": [ 22036466653, 22022473659, 22038691971 ], "samples_ts": [ 23.2342, 23.249, 23.2319 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T02:59:32Z", "avg_ns": 17621411430, "stddev_ns": 50760253, "avg_ts": 7.263931, "stddev_ts": 0.020954, "samples_ns": [ 17563712487, 17659187943, 17641333862 ], "samples_ts": [ 7.28775, 7.24835, 7.25569 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 318 }, { "timestamp_utc": "2025-12-09T03:05:27.827586+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:00:25Z\",\n \"avg_ns\": 21946390380,\n \"stddev_ns\": 54194714,\n \"avg_ts\": 23.329672,\n \"stddev_ts\": 0.057553,\n \"samples_ns\": [ 22006760391, 21930474908, 21901935843 ],\n \"samples_ts\": [ 23.2656, 23.3465, 23.3769 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:01:53Z\",\n \"avg_ns\": 71326476398,\n \"stddev_ns\": 132350628,\n \"avg_ts\": 7.178277,\n \"stddev_ts\": 0.013314,\n \"samples_ns\": [ 71304952006, 71468269942, 71206207246 ],\n \"samples_ts\": [ 7.18043, 7.16402, 7.19038 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T03:00:25Z", "avg_ns": 21946390380, "stddev_ns": 54194714, "avg_ts": 23.329672, "stddev_ts": 0.057553, "samples_ns": [ 22006760391, 21930474908, 21901935843 ], "samples_ts": [ 23.2656, 23.3465, 23.3769 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T03:01:53Z", "avg_ns": 71326476398, "stddev_ns": 132350628, "avg_ts": 7.178277, "stddev_ts": 0.013314, "samples_ns": [ 71304952006, 71468269942, 71206207246 ], "samples_ts": [ 7.18043, 7.16402, 7.19038 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 319 }, { "timestamp_utc": "2025-12-09T03:06:41.797796+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:05:28Z\",\n \"avg_ns\": 5440231550,\n \"stddev_ns\": 6727269,\n \"avg_ts\": 23.528434,\n \"stddev_ts\": 0.029112,\n \"samples_ns\": [ 5444005357, 5432465527, 5444223768 ],\n \"samples_ts\": [ 23.5121, 23.562, 23.5112 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:05:50Z\",\n \"avg_ns\": 17100702768,\n \"stddev_ns\": 4059258,\n \"avg_ts\": 7.485073,\n \"stddev_ts\": 0.001776,\n \"samples_ns\": [ 17105380881, 17098580337, 17098147087 ],\n \"samples_ts\": [ 7.48303, 7.486, 7.48619 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T03:05:28Z", "avg_ns": 5440231550, "stddev_ns": 6727269, "avg_ts": 23.528434, "stddev_ts": 0.029112, "samples_ns": [ 5444005357, 5432465527, 5444223768 ], "samples_ts": [ 23.5121, 23.562, 23.5112 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T03:05:50Z", "avg_ns": 17100702768, "stddev_ns": 4059258, "avg_ts": 7.485073, "stddev_ts": 0.001776, "samples_ns": [ 17105380881, 17098580337, 17098147087 ], "samples_ts": [ 7.48303, 7.486, 7.48619 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 320 }, { "timestamp_utc": "2025-12-09T03:10:32.106218+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:06:42Z\",\n \"avg_ns\": 5424553836,\n \"stddev_ns\": 25314989,\n \"avg_ts\": 23.596752,\n \"stddev_ts\": 0.109831,\n \"samples_ns\": [ 5411853817, 5408103070, 5453704622 ],\n \"samples_ts\": [ 23.6518, 23.6682, 23.4703 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:07:04Z\",\n \"avg_ns\": 69232992955,\n \"stddev_ns\": 62770445,\n \"avg_ts\": 7.395322,\n \"stddev_ts\": 0.006709,\n \"samples_ns\": [ 69269628426, 69268837111, 69160513328 ],\n \"samples_ts\": [ 7.39141, 7.39149, 7.40307 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T03:06:42Z", "avg_ns": 5424553836, "stddev_ns": 25314989, "avg_ts": 23.596752, "stddev_ts": 0.109831, "samples_ns": [ 5411853817, 5408103070, 5453704622 ], "samples_ts": [ 23.6518, 23.6682, 23.4703 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T03:07:04Z", "avg_ns": 69232992955, "stddev_ns": 62770445, "avg_ts": 7.395322, "stddev_ts": 0.006709, "samples_ns": [ 69269628426, 69268837111, 69160513328 ], "samples_ts": [ 7.39141, 7.39149, 7.40307 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 321 }, { "timestamp_utc": "2025-12-09T03:12:54.764031+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:10:32Z\",\n \"avg_ns\": 22212005623,\n \"stddev_ns\": 13068531,\n \"avg_ts\": 23.050603,\n \"stddev_ts\": 0.013564,\n \"samples_ns\": [ 22223705446, 22197902099, 22214409324 ],\n \"samples_ts\": [ 23.0385, 23.0652, 23.0481 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:12:01Z\",\n \"avg_ns\": 17637641073,\n \"stddev_ns\": 42106913,\n \"avg_ts\": 7.257234,\n \"stddev_ts\": 0.017329,\n \"samples_ns\": [ 17678349438, 17640311094, 17594262687 ],\n \"samples_ts\": [ 7.24049, 7.25611, 7.2751 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T03:10:32Z", "avg_ns": 22212005623, "stddev_ns": 13068531, "avg_ts": 23.050603, "stddev_ts": 0.013564, "samples_ns": [ 22223705446, 22197902099, 22214409324 ], "samples_ts": [ 23.0385, 23.0652, 23.0481 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T03:12:01Z", "avg_ns": 17637641073, "stddev_ns": 42106913, "avg_ts": 7.257234, "stddev_ts": 0.017329, "samples_ns": [ 17678349438, 17640311094, 17594262687 ], "samples_ts": [ 7.24049, 7.25611, 7.2751 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 322 }, { "timestamp_utc": "2025-12-09T03:17:52.970046+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:12:55Z\",\n \"avg_ns\": 22302675232,\n \"stddev_ns\": 63369355,\n \"avg_ts\": 22.957011,\n \"stddev_ts\": 0.065291,\n \"samples_ns\": [ 22357114866, 22233112794, 22317798037 ],\n \"samples_ts\": [ 22.901, 23.0287, 22.9413 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:14:24Z\",\n \"avg_ns\": 69328700408,\n \"stddev_ns\": 153147673,\n \"avg_ts\": 7.385133,\n \"stddev_ts\": 0.016293,\n \"samples_ns\": [ 69233085405, 69505339075, 69247676745 ],\n \"samples_ts\": [ 7.39531, 7.36634, 7.39375 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T03:12:55Z", "avg_ns": 22302675232, "stddev_ns": 63369355, "avg_ts": 22.957011, "stddev_ts": 0.065291, "samples_ns": [ 22357114866, 22233112794, 22317798037 ], "samples_ts": [ 22.901, 23.0287, 22.9413 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T03:14:24Z", "avg_ns": 69328700408, "stddev_ns": 153147673, "avg_ts": 7.385133, "stddev_ts": 0.016293, "samples_ns": [ 69233085405, 69505339075, 69247676745 ], "samples_ts": [ 7.39531, 7.36634, 7.39375 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 323 }, { "timestamp_utc": "2025-12-09T03:18:34.233436+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:17:53Z\",\n \"avg_ns\": 2799402321,\n \"stddev_ns\": 4720762,\n \"avg_ts\": 45.724132,\n \"stddev_ts\": 0.077102,\n \"samples_ns\": [ 2799522824, 2804061086, 2794623055 ],\n \"samples_ts\": [ 45.7221, 45.6481, 45.8022 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:18:04Z\",\n \"avg_ns\": 9741254081,\n \"stddev_ns\": 19415595,\n \"avg_ts\": 13.140027,\n \"stddev_ts\": 0.026216,\n \"samples_ns\": [ 9719129690, 9755453007, 9749179547 ],\n \"samples_ts\": [ 13.1699, 13.1209, 13.1293 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T03:17:53Z", "avg_ns": 2799402321, "stddev_ns": 4720762, "avg_ts": 45.724132, "stddev_ts": 0.077102, "samples_ns": [ 2799522824, 2804061086, 2794623055 ], "samples_ts": [ 45.7221, 45.6481, 45.8022 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T03:18:04Z", "avg_ns": 9741254081, "stddev_ns": 19415595, "avg_ts": 13.140027, "stddev_ts": 0.026216, "samples_ns": [ 9719129690, 9755453007, 9749179547 ], "samples_ts": [ 13.1699, 13.1209, 13.1293 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 512, "ubatch_size": 128, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 324 }, { "timestamp_utc": "2025-12-09T03:20:39.345570+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:18:34Z\",\n \"avg_ns\": 2781984056,\n \"stddev_ns\": 7356090,\n \"avg_ts\": 46.010543,\n \"stddev_ts\": 0.121503,\n \"samples_ns\": [ 2790317317, 2776393745, 2779241107 ],\n \"samples_ts\": [ 45.8729, 46.103, 46.0557 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:18:46Z\",\n \"avg_ns\": 37710687109,\n \"stddev_ns\": 57104913,\n \"avg_ts\": 13.577074,\n \"stddev_ts\": 0.020543,\n \"samples_ns\": [ 37670312062, 37685726401, 37776022864 ],\n \"samples_ts\": [ 13.5916, 13.586, 13.5536 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T03:18:34Z", "avg_ns": 2781984056, "stddev_ns": 7356090, "avg_ts": 46.010543, "stddev_ts": 0.121503, "samples_ns": [ 2790317317, 2776393745, 2779241107 ], "samples_ts": [ 45.8729, 46.103, 46.0557 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T03:18:46Z", "avg_ns": 37710687109, "stddev_ns": 57104913, "avg_ts": 13.577074, "stddev_ts": 0.020543, "samples_ns": [ 37670312062, 37685726401, 37776022864 ], "samples_ts": [ 13.5916, 13.586, 13.5536 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 512, "ubatch_size": 128, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 325 }, { "timestamp_utc": "2025-12-09T03:21:52.676225+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:20:40Z\",\n \"avg_ns\": 11165829052,\n \"stddev_ns\": 12675397,\n \"avg_ts\": 45.854225,\n \"stddev_ts\": 0.052069,\n \"samples_ns\": [ 11177228330, 11152179064, 11168079762 ],\n \"samples_ts\": [ 45.8074, 45.9103, 45.8449 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:21:24Z\",\n \"avg_ns\": 9286262939,\n \"stddev_ns\": 19600070,\n \"avg_ts\": 13.783842,\n \"stddev_ts\": 0.029067,\n \"samples_ns\": [ 9308126657, 9280394368, 9270267793 ],\n \"samples_ts\": [ 13.7514, 13.7925, 13.8076 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T03:20:40Z", "avg_ns": 11165829052, "stddev_ns": 12675397, "avg_ts": 45.854225, "stddev_ts": 0.052069, "samples_ns": [ 11177228330, 11152179064, 11168079762 ], "samples_ts": [ 45.8074, 45.9103, 45.8449 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T03:21:24Z", "avg_ns": 9286262939, "stddev_ns": 19600070, "avg_ts": 13.783842, "stddev_ts": 0.029067, "samples_ns": [ 9308126657, 9280394368, 9270267793 ], "samples_ts": [ 13.7514, 13.7925, 13.8076 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 512, "ubatch_size": 128, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 326 }, { "timestamp_utc": "2025-12-09T03:24:31.237846+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:21:53Z\",\n \"avg_ns\": 11193996950,\n \"stddev_ns\": 23432538,\n \"avg_ts\": 45.738935,\n \"stddev_ts\": 0.095647,\n \"samples_ns\": [ 11185031572, 11220588216, 11176371063 ],\n \"samples_ts\": [ 45.7755, 45.6304, 45.8109 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:22:38Z\",\n \"avg_ns\": 37635494890,\n \"stddev_ns\": 40573775,\n \"avg_ts\": 13.604189,\n \"stddev_ts\": 0.014659,\n \"samples_ns\": [ 37603807759, 37681223240, 37621453673 ],\n \"samples_ts\": [ 13.6156, 13.5877, 13.6093 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T03:21:53Z", "avg_ns": 11193996950, "stddev_ns": 23432538, "avg_ts": 45.738935, "stddev_ts": 0.095647, "samples_ns": [ 11185031572, 11220588216, 11176371063 ], "samples_ts": [ 45.7755, 45.6304, 45.8109 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T03:22:38Z", "avg_ns": 37635494890, "stddev_ns": 40573775, "avg_ts": 13.604189, "stddev_ts": 0.014659, "samples_ns": [ 37603807759, 37681223240, 37621453673 ], "samples_ts": [ 13.6156, 13.5877, 13.6093 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 512, "ubatch_size": 128, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 327 }, { "timestamp_utc": "2025-12-09T03:25:12.467464+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:24:31Z\",\n \"avg_ns\": 2782535225,\n \"stddev_ns\": 8212261,\n \"avg_ts\": 46.001482,\n \"stddev_ts\": 0.135851,\n \"samples_ns\": [ 2790102586, 2773802553, 2783700536 ],\n \"samples_ts\": [ 45.8764, 46.146, 45.982 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:24:43Z\",\n \"avg_ns\": 9759087755,\n \"stddev_ns\": 20287706,\n \"avg_ts\": 13.116018,\n \"stddev_ts\": 0.027268,\n \"samples_ns\": [ 9738542257, 9779106446, 9759614564 ],\n \"samples_ts\": [ 13.1437, 13.0891, 13.1153 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T03:24:31Z", "avg_ns": 2782535225, "stddev_ns": 8212261, "avg_ts": 46.001482, "stddev_ts": 0.135851, "samples_ns": [ 2790102586, 2773802553, 2783700536 ], "samples_ts": [ 45.8764, 46.146, 45.982 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T03:24:43Z", "avg_ns": 9759087755, "stddev_ns": 20287706, "avg_ts": 13.116018, "stddev_ts": 0.027268, "samples_ns": [ 9738542257, 9779106446, 9759614564 ], "samples_ts": [ 13.1437, 13.0891, 13.1153 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 512, "ubatch_size": 256, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 328 }, { "timestamp_utc": "2025-12-09T03:27:17.442412+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:25:13Z\",\n \"avg_ns\": 2781843432,\n \"stddev_ns\": 14191010,\n \"avg_ts\": 46.013451,\n \"stddev_ts\": 0.234188,\n \"samples_ns\": [ 2797814582, 2777031342, 2770684374 ],\n \"samples_ts\": [ 45.75, 46.0924, 46.198 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:25:24Z\",\n \"avg_ns\": 37667884816,\n \"stddev_ns\": 41419518,\n \"avg_ts\": 13.592492,\n \"stddev_ts\": 0.014945,\n \"samples_ns\": [ 37666116484, 37710159725, 37627378240 ],\n \"samples_ts\": [ 13.5931, 13.5772, 13.6071 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T03:25:13Z", "avg_ns": 2781843432, "stddev_ns": 14191010, "avg_ts": 46.013451, "stddev_ts": 0.234188, "samples_ns": [ 2797814582, 2777031342, 2770684374 ], "samples_ts": [ 45.75, 46.0924, 46.198 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T03:25:24Z", "avg_ns": 37667884816, "stddev_ns": 41419518, "avg_ts": 13.592492, "stddev_ts": 0.014945, "samples_ns": [ 37666116484, 37710159725, 37627378240 ], "samples_ts": [ 13.5931, 13.5772, 13.6071 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 512, "ubatch_size": 256, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 329 }, { "timestamp_utc": "2025-12-09T03:28:31.707636+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:27:18Z\",\n \"avg_ns\": 11383241811,\n \"stddev_ns\": 234742889,\n \"avg_ts\": 44.991005,\n \"stddev_ts\": 0.916882,\n \"samples_ns\": [ 11245835772, 11654290817, 11249598845 ],\n \"samples_ts\": [ 45.528, 43.9323, 45.5127 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:28:03Z\",\n \"avg_ns\": 9323097777,\n \"stddev_ns\": 22754685,\n \"avg_ts\": 13.729397,\n \"stddev_ts\": 0.033510,\n \"samples_ns\": [ 9323341205, 9345729363, 9300222765 ],\n \"samples_ts\": [ 13.729, 13.6961, 13.7631 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T03:27:18Z", "avg_ns": 11383241811, "stddev_ns": 234742889, "avg_ts": 44.991005, "stddev_ts": 0.916882, "samples_ns": [ 11245835772, 11654290817, 11249598845 ], "samples_ts": [ 45.528, 43.9323, 45.5127 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T03:28:03Z", "avg_ns": 9323097777, "stddev_ns": 22754685, "avg_ts": 13.729397, "stddev_ts": 0.03351, "samples_ns": [ 9323341205, 9345729363, 9300222765 ], "samples_ts": [ 13.729, 13.6961, 13.7631 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 512, "ubatch_size": 256, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 330 }, { "timestamp_utc": "2025-12-09T03:31:10.793950+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:28:32Z\",\n \"avg_ns\": 11245195136,\n \"stddev_ns\": 24575153,\n \"avg_ts\": 45.530702,\n \"stddev_ts\": 0.099559,\n \"samples_ns\": [ 11267301288, 11249550716, 11218733404 ],\n \"samples_ts\": [ 45.4412, 45.5129, 45.638 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:29:17Z\",\n \"avg_ns\": 37746847080,\n \"stddev_ns\": 47955863,\n \"avg_ts\": 13.564061,\n \"stddev_ts\": 0.017245,\n \"samples_ns\": [ 37691688061, 37770198495, 37778654684 ],\n \"samples_ts\": [ 13.5839, 13.5557, 13.5526 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T03:28:32Z", "avg_ns": 11245195136, "stddev_ns": 24575153, "avg_ts": 45.530702, "stddev_ts": 0.099559, "samples_ns": [ 11267301288, 11249550716, 11218733404 ], "samples_ts": [ 45.4412, 45.5129, 45.638 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T03:29:17Z", "avg_ns": 37746847080, "stddev_ns": 47955863, "avg_ts": 13.564061, "stddev_ts": 0.017245, "samples_ns": [ 37691688061, 37770198495, 37778654684 ], "samples_ts": [ 13.5839, 13.5557, 13.5526 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 512, "ubatch_size": 256, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 331 }, { "timestamp_utc": "2025-12-09T03:31:52.241179+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:31:11Z\",\n \"avg_ns\": 2790173062,\n \"stddev_ns\": 7234749,\n \"avg_ts\": 45.875496,\n \"stddev_ts\": 0.118838,\n \"samples_ns\": [ 2784014958, 2798140298, 2788363932 ],\n \"samples_ts\": [ 45.9768, 45.7447, 45.9051 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:31:22Z\",\n \"avg_ns\": 9807217323,\n \"stddev_ns\": 39545647,\n \"avg_ts\": 13.051754,\n \"stddev_ts\": 0.052623,\n \"samples_ns\": [ 9806475037, 9768048168, 9847128765 ],\n \"samples_ts\": [ 13.0526, 13.1039, 12.9987 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T03:31:11Z", "avg_ns": 2790173062, "stddev_ns": 7234749, "avg_ts": 45.875496, "stddev_ts": 0.118838, "samples_ns": [ 2784014958, 2798140298, 2788363932 ], "samples_ts": [ 45.9768, 45.7447, 45.9051 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T03:31:22Z", "avg_ns": 9807217323, "stddev_ns": 39545647, "avg_ts": 13.051754, "stddev_ts": 0.052623, "samples_ns": [ 9806475037, 9768048168, 9847128765 ], "samples_ts": [ 13.0526, 13.1039, 12.9987 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 512, "ubatch_size": 512, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 332 }, { "timestamp_utc": "2025-12-09T03:33:57.144018+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:31:52Z\",\n \"avg_ns\": 2791588048,\n \"stddev_ns\": 10173851,\n \"avg_ts\": 45.852443,\n \"stddev_ts\": 0.166815,\n \"samples_ns\": [ 2783790450, 2803096037, 2787877659 ],\n \"samples_ts\": [ 45.9805, 45.6638, 45.9131 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:32:04Z\",\n \"avg_ns\": 37640495284,\n \"stddev_ns\": 48200379,\n \"avg_ts\": 13.602386,\n \"stddev_ts\": 0.017406,\n \"samples_ns\": [ 37618268914, 37695798178, 37607418761 ],\n \"samples_ts\": [ 13.6104, 13.5824, 13.6143 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T03:31:52Z", "avg_ns": 2791588048, "stddev_ns": 10173851, "avg_ts": 45.852443, "stddev_ts": 0.166815, "samples_ns": [ 2783790450, 2803096037, 2787877659 ], "samples_ts": [ 45.9805, 45.6638, 45.9131 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T03:32:04Z", "avg_ns": 37640495284, "stddev_ns": 48200379, "avg_ts": 13.602386, "stddev_ts": 0.017406, "samples_ns": [ 37618268914, 37695798178, 37607418761 ], "samples_ts": [ 13.6104, 13.5824, 13.6143 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 512, "ubatch_size": 512, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 333 }, { "timestamp_utc": "2025-12-09T03:35:11.577928+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:33:57Z\",\n \"avg_ns\": 11395636973,\n \"stddev_ns\": 9486622,\n \"avg_ts\": 44.929497,\n \"stddev_ts\": 0.037385,\n \"samples_ns\": [ 11392794173, 11406218728, 11387898020 ],\n \"samples_ts\": [ 44.9407, 44.8878, 44.96 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:34:43Z\",\n \"avg_ns\": 9336588621,\n \"stddev_ns\": 28051850,\n \"avg_ts\": 13.709587,\n \"stddev_ts\": 0.041169,\n \"samples_ns\": [ 9310283188, 9333373225, 9366109451 ],\n \"samples_ts\": [ 13.7482, 13.7142, 13.6663 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T03:33:57Z", "avg_ns": 11395636973, "stddev_ns": 9486622, "avg_ts": 44.929497, "stddev_ts": 0.037385, "samples_ns": [ 11392794173, 11406218728, 11387898020 ], "samples_ts": [ 44.9407, 44.8878, 44.96 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T03:34:43Z", "avg_ns": 9336588621, "stddev_ns": 28051850, "avg_ts": 13.709587, "stddev_ts": 0.041169, "samples_ns": [ 9310283188, 9333373225, 9366109451 ], "samples_ts": [ 13.7482, 13.7142, 13.6663 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 512, "ubatch_size": 512, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 334 }, { "timestamp_utc": "2025-12-09T03:37:50.954944+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:35:12Z\",\n \"avg_ns\": 11347554182,\n \"stddev_ns\": 18599874,\n \"avg_ts\": 45.119936,\n \"stddev_ts\": 0.073891,\n \"samples_ns\": [ 11339398767, 11334425470, 11368838310 ],\n \"samples_ts\": [ 45.1523, 45.1721, 45.0354 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:35:57Z\",\n \"avg_ns\": 37696618311,\n \"stddev_ns\": 62200381,\n \"avg_ts\": 13.582145,\n \"stddev_ts\": 0.022393,\n \"samples_ns\": [ 37767098007, 37673349641, 37649407286 ],\n \"samples_ts\": [ 13.5568, 13.5905, 13.5992 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T03:35:12Z", "avg_ns": 11347554182, "stddev_ns": 18599874, "avg_ts": 45.119936, "stddev_ts": 0.073891, "samples_ns": [ 11339398767, 11334425470, 11368838310 ], "samples_ts": [ 45.1523, 45.1721, 45.0354 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T03:35:57Z", "avg_ns": 37696618311, "stddev_ns": 62200381, "avg_ts": 13.582145, "stddev_ts": 0.022393, "samples_ns": [ 37767098007, 37673349641, 37649407286 ], "samples_ts": [ 13.5568, 13.5905, 13.5992 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 512, "ubatch_size": 512, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 335 }, { "timestamp_utc": "2025-12-09T03:38:30.951548+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:37:51Z\",\n \"avg_ns\": 2804728981,\n \"stddev_ns\": 10198291,\n \"avg_ts\": 45.637610,\n \"stddev_ts\": 0.165691,\n \"samples_ns\": [ 2796481980, 2801572689, 2816132274 ],\n \"samples_ts\": [ 45.7718, 45.6886, 45.4524 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:38:02Z\",\n \"avg_ns\": 9321096820,\n \"stddev_ns\": 14093739,\n \"avg_ts\": 13.732310,\n \"stddev_ts\": 0.020776,\n \"samples_ns\": [ 9332357494, 9325640607, 9305292361 ],\n \"samples_ts\": [ 13.7157, 13.7256, 13.7556 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T03:37:51Z", "avg_ns": 2804728981, "stddev_ns": 10198291, "avg_ts": 45.63761, "stddev_ts": 0.165691, "samples_ns": [ 2796481980, 2801572689, 2816132274 ], "samples_ts": [ 45.7718, 45.6886, 45.4524 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T03:38:02Z", "avg_ns": 9321096820, "stddev_ns": 14093739, "avg_ts": 13.73231, "stddev_ts": 0.020776, "samples_ns": [ 9332357494, 9325640607, 9305292361 ], "samples_ts": [ 13.7157, 13.7256, 13.7556 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 336 }, { "timestamp_utc": "2025-12-09T03:40:36.316751+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:38:31Z\",\n \"avg_ns\": 2776953585,\n \"stddev_ns\": 15259272,\n \"avg_ts\": 46.094603,\n \"stddev_ts\": 0.253066,\n \"samples_ns\": [ 2762593612, 2775291287, 2792975857 ],\n \"samples_ts\": [ 46.3333, 46.1213, 45.8293 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:38:42Z\",\n \"avg_ns\": 37805231390,\n \"stddev_ns\": 137230120,\n \"avg_ts\": 13.543218,\n \"stddev_ts\": 0.049105,\n \"samples_ns\": [ 37774997188, 37685639450, 37955057532 ],\n \"samples_ts\": [ 13.5539, 13.5861, 13.4896 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T03:38:31Z", "avg_ns": 2776953585, "stddev_ns": 15259272, "avg_ts": 46.094603, "stddev_ts": 0.253066, "samples_ns": [ 2762593612, 2775291287, 2792975857 ], "samples_ts": [ 46.3333, 46.1213, 45.8293 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T03:38:42Z", "avg_ns": 37805231390, "stddev_ns": 137230120, "avg_ts": 13.543218, "stddev_ts": 0.049105, "samples_ns": [ 37774997188, 37685639450, 37955057532 ], "samples_ts": [ 13.5539, 13.5861, 13.4896 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 337 }, { "timestamp_utc": "2025-12-09T03:41:49.866354+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:40:36Z\",\n \"avg_ns\": 11186509804,\n \"stddev_ns\": 24323709,\n \"avg_ts\": 45.769559,\n \"stddev_ts\": 0.099613,\n \"samples_ns\": [ 11206044214, 11194219668, 11159265530 ],\n \"samples_ts\": [ 45.6896, 45.7379, 45.8812 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:41:21Z\",\n \"avg_ns\": 9309505210,\n \"stddev_ns\": 27013027,\n \"avg_ts\": 13.749465,\n \"stddev_ts\": 0.039941,\n \"samples_ns\": [ 9331818051, 9317224381, 9279473200 ],\n \"samples_ts\": [ 13.7165, 13.738, 13.7939 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T03:40:36Z", "avg_ns": 11186509804, "stddev_ns": 24323709, "avg_ts": 45.769559, "stddev_ts": 0.099613, "samples_ns": [ 11206044214, 11194219668, 11159265530 ], "samples_ts": [ 45.6896, 45.7379, 45.8812 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T03:41:21Z", "avg_ns": 9309505210, "stddev_ns": 27013027, "avg_ts": 13.749465, "stddev_ts": 0.039941, "samples_ns": [ 9331818051, 9317224381, 9279473200 ], "samples_ts": [ 13.7165, 13.738, 13.7939 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 338 }, { "timestamp_utc": "2025-12-09T03:44:28.274313+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:41:50Z\",\n \"avg_ns\": 11139019930,\n \"stddev_ns\": 25247857,\n \"avg_ts\": 45.964704,\n \"stddev_ts\": 0.104282,\n \"samples_ns\": [ 11159395208, 11146889647, 11110774937 ],\n \"samples_ts\": [ 45.8806, 45.9321, 46.0814 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:42:35Z\",\n \"avg_ns\": 37646079658,\n \"stddev_ns\": 42542773,\n \"avg_ts\": 13.600365,\n \"stddev_ts\": 0.015375,\n \"samples_ns\": [ 37656880386, 37682180597, 37599177992 ],\n \"samples_ts\": [ 13.5965, 13.5873, 13.6173 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T03:41:50Z", "avg_ns": 11139019930, "stddev_ns": 25247857, "avg_ts": 45.964704, "stddev_ts": 0.104282, "samples_ns": [ 11159395208, 11146889647, 11110774937 ], "samples_ts": [ 45.8806, 45.9321, 46.0814 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T03:42:35Z", "avg_ns": 37646079658, "stddev_ns": 42542773, "avg_ts": 13.600365, "stddev_ts": 0.015375, "samples_ns": [ 37656880386, 37682180597, 37599177992 ], "samples_ts": [ 13.5965, 13.5873, 13.6173 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 339 }, { "timestamp_utc": "2025-12-09T03:45:08.351784+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:44:28Z\",\n \"avg_ns\": 2806315962,\n \"stddev_ns\": 8380355,\n \"avg_ts\": 45.611672,\n \"stddev_ts\": 0.136442,\n \"samples_ns\": [ 2811434348, 2810868868, 2796644670 ],\n \"samples_ts\": [ 45.5284, 45.5375, 45.7691 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:44:40Z\",\n \"avg_ns\": 9348231872,\n \"stddev_ns\": 17626601,\n \"avg_ts\": 13.692461,\n \"stddev_ts\": 0.025803,\n \"samples_ns\": [ 9344455514, 9332799502, 9367440600 ],\n \"samples_ts\": [ 13.698, 13.7151, 13.6644 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T03:44:28Z", "avg_ns": 2806315962, "stddev_ns": 8380355, "avg_ts": 45.611672, "stddev_ts": 0.136442, "samples_ns": [ 2811434348, 2810868868, 2796644670 ], "samples_ts": [ 45.5284, 45.5375, 45.7691 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T03:44:40Z", "avg_ns": 9348231872, "stddev_ns": 17626601, "avg_ts": 13.692461, "stddev_ts": 0.025803, "samples_ns": [ 9344455514, 9332799502, 9367440600 ], "samples_ts": [ 13.698, 13.7151, 13.6644 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 340 }, { "timestamp_utc": "2025-12-09T03:47:13.251819+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:45:09Z\",\n \"avg_ns\": 2786632456,\n \"stddev_ns\": 13957042,\n \"avg_ts\": 45.934347,\n \"stddev_ts\": 0.230330,\n \"samples_ns\": [ 2771712071, 2799368331, 2788816967 ],\n \"samples_ts\": [ 46.1808, 45.7246, 45.8976 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:45:20Z\",\n \"avg_ns\": 37638807028,\n \"stddev_ns\": 50169782,\n \"avg_ts\": 13.602998,\n \"stddev_ts\": 0.018120,\n \"samples_ns\": [ 37620754396, 37600161718, 37695504970 ],\n \"samples_ts\": [ 13.6095, 13.617, 13.5825 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T03:45:09Z", "avg_ns": 2786632456, "stddev_ns": 13957042, "avg_ts": 45.934347, "stddev_ts": 0.23033, "samples_ns": [ 2771712071, 2799368331, 2788816967 ], "samples_ts": [ 46.1808, 45.7246, 45.8976 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T03:45:20Z", "avg_ns": 37638807028, "stddev_ns": 50169782, "avg_ts": 13.602998, "stddev_ts": 0.01812, "samples_ns": [ 37620754396, 37600161718, 37695504970 ], "samples_ts": [ 13.6095, 13.617, 13.5825 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 341 }, { "timestamp_utc": "2025-12-09T03:48:26.912696+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:47:13Z\",\n \"avg_ns\": 11220863168,\n \"stddev_ns\": 35372564,\n \"avg_ts\": 45.629590,\n \"stddev_ts\": 0.143585,\n \"samples_ns\": [ 11198313616, 11202644804, 11261631085 ],\n \"samples_ts\": [ 45.7212, 45.7035, 45.4641 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:47:58Z\",\n \"avg_ns\": 9315156611,\n \"stddev_ns\": 40311096,\n \"avg_ts\": 13.741218,\n \"stddev_ts\": 0.059321,\n \"samples_ns\": [ 9361535068, 9288538930, 9295395836 ],\n \"samples_ts\": [ 13.673, 13.7804, 13.7703 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T03:47:13Z", "avg_ns": 11220863168, "stddev_ns": 35372564, "avg_ts": 45.62959, "stddev_ts": 0.143585, "samples_ns": [ 11198313616, 11202644804, 11261631085 ], "samples_ts": [ 45.7212, 45.7035, 45.4641 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T03:47:58Z", "avg_ns": 9315156611, "stddev_ns": 40311096, "avg_ts": 13.741218, "stddev_ts": 0.059321, "samples_ns": [ 9361535068, 9288538930, 9295395836 ], "samples_ts": [ 13.673, 13.7804, 13.7703 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 342 }, { "timestamp_utc": "2025-12-09T03:51:05.848967+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:48:27Z\",\n \"avg_ns\": 11238956769,\n \"stddev_ns\": 32095364,\n \"avg_ts\": 45.556077,\n \"stddev_ts\": 0.130085,\n \"samples_ns\": [ 11207187813, 11271368876, 11238313618 ],\n \"samples_ts\": [ 45.685, 45.4248, 45.5584 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:49:12Z\",\n \"avg_ns\": 37720570679,\n \"stddev_ns\": 126449920,\n \"avg_ts\": 13.573597,\n \"stddev_ts\": 0.045513,\n \"samples_ns\": [ 37591261495, 37726497972, 37843952571 ],\n \"samples_ts\": [ 13.6202, 13.5714, 13.5292 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T03:48:27Z", "avg_ns": 11238956769, "stddev_ns": 32095364, "avg_ts": 45.556077, "stddev_ts": 0.130085, "samples_ns": [ 11207187813, 11271368876, 11238313618 ], "samples_ts": [ 45.685, 45.4248, 45.5584 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T03:49:12Z", "avg_ns": 37720570679, "stddev_ns": 126449920, "avg_ts": 13.573597, "stddev_ts": 0.045513, "samples_ns": [ 37591261495, 37726497972, 37843952571 ], "samples_ts": [ 13.6202, 13.5714, 13.5292 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 343 }, { "timestamp_utc": "2025-12-09T03:51:45.667251+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:51:06Z\",\n \"avg_ns\": 2782860760,\n \"stddev_ns\": 4787662,\n \"avg_ts\": 45.995924,\n \"stddev_ts\": 0.079191,\n \"samples_ns\": [ 2777415840, 2786408314, 2784758128 ],\n \"samples_ts\": [ 46.086, 45.9373, 45.9645 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:51:17Z\",\n \"avg_ns\": 9288892983,\n \"stddev_ns\": 29478133,\n \"avg_ts\": 13.779991,\n \"stddev_ts\": 0.043729,\n \"samples_ns\": [ 9288654605, 9259535078, 9318489268 ],\n \"samples_ts\": [ 13.7803, 13.8236, 13.7361 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T03:51:06Z", "avg_ns": 2782860760, "stddev_ns": 4787662, "avg_ts": 45.995924, "stddev_ts": 0.079191, "samples_ns": [ 2777415840, 2786408314, 2784758128 ], "samples_ts": [ 46.086, 45.9373, 45.9645 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T03:51:17Z", "avg_ns": 9288892983, "stddev_ns": 29478133, "avg_ts": 13.779991, "stddev_ts": 0.043729, "samples_ns": [ 9288654605, 9259535078, 9318489268 ], "samples_ts": [ 13.7803, 13.8236, 13.7361 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 344 }, { "timestamp_utc": "2025-12-09T03:53:50.654685+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:51:46Z\",\n \"avg_ns\": 2796949233,\n \"stddev_ns\": 8633527,\n \"avg_ts\": 45.764439,\n \"stddev_ts\": 0.141324,\n \"samples_ns\": [ 2787949676, 2797735015, 2805163008 ],\n \"samples_ts\": [ 45.9119, 45.7513, 45.6301 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:51:57Z\",\n \"avg_ns\": 37655988530,\n \"stddev_ns\": 148470350,\n \"avg_ts\": 13.596916,\n \"stddev_ts\": 0.053490,\n \"samples_ns\": [ 37578317745, 37562465010, 37827182835 ],\n \"samples_ts\": [ 13.6249, 13.6306, 13.5352 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T03:51:46Z", "avg_ns": 2796949233, "stddev_ns": 8633527, "avg_ts": 45.764439, "stddev_ts": 0.141324, "samples_ns": [ 2787949676, 2797735015, 2805163008 ], "samples_ts": [ 45.9119, 45.7513, 45.6301 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T03:51:57Z", "avg_ns": 37655988530, "stddev_ns": 148470350, "avg_ts": 13.596916, "stddev_ts": 0.05349, "samples_ns": [ 37578317745, 37562465010, 37827182835 ], "samples_ts": [ 13.6249, 13.6306, 13.5352 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 345 }, { "timestamp_utc": "2025-12-09T03:55:04.988983+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:53:51Z\",\n \"avg_ns\": 11351756344,\n \"stddev_ns\": 14319520,\n \"avg_ts\": 45.103201,\n \"stddev_ts\": 0.056856,\n \"samples_ns\": [ 11368126242, 11341557157, 11345585634 ],\n \"samples_ts\": [ 45.0382, 45.1437, 45.1277 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:54:36Z\",\n \"avg_ns\": 9352221114,\n \"stddev_ns\": 66148412,\n \"avg_ts\": 13.687046,\n \"stddev_ts\": 0.097147,\n \"samples_ns\": [ 9277126426, 9377677776, 9401859142 ],\n \"samples_ts\": [ 13.7974, 13.6494, 13.6143 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T03:53:51Z", "avg_ns": 11351756344, "stddev_ns": 14319520, "avg_ts": 45.103201, "stddev_ts": 0.056856, "samples_ns": [ 11368126242, 11341557157, 11345585634 ], "samples_ts": [ 45.0382, 45.1437, 45.1277 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T03:54:36Z", "avg_ns": 9352221114, "stddev_ns": 66148412, "avg_ts": 13.687046, "stddev_ts": 0.097147, "samples_ns": [ 9277126426, 9377677776, 9401859142 ], "samples_ts": [ 13.7974, 13.6494, 13.6143 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 346 }, { "timestamp_utc": "2025-12-09T03:57:44.237924+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:55:05Z\",\n \"avg_ns\": 11370694207,\n \"stddev_ns\": 20750710,\n \"avg_ts\": 45.028134,\n \"stddev_ts\": 0.082202,\n \"samples_ns\": [ 11373592099, 11389843100, 11348647424 ],\n \"samples_ts\": [ 45.0166, 44.9523, 45.1155 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:55:51Z\",\n \"avg_ns\": 37623689643,\n \"stddev_ns\": 12656249,\n \"avg_ts\": 13.608448,\n \"stddev_ts\": 0.004577,\n \"samples_ns\": [ 37613152776, 37637727995, 37620188158 ],\n \"samples_ts\": [ 13.6123, 13.6034, 13.6097 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T03:55:05Z", "avg_ns": 11370694207, "stddev_ns": 20750710, "avg_ts": 45.028134, "stddev_ts": 0.082202, "samples_ns": [ 11373592099, 11389843100, 11348647424 ], "samples_ts": [ 45.0166, 44.9523, 45.1155 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T03:55:51Z", "avg_ns": 37623689643, "stddev_ns": 12656249, "avg_ts": 13.608448, "stddev_ts": 0.004577, "samples_ns": [ 37613152776, 37637727995, 37620188158 ], "samples_ts": [ 13.6123, 13.6034, 13.6097 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 347 }, { "timestamp_utc": "2025-12-09T03:58:25.386828+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:57:44Z\",\n \"avg_ns\": 2788157990,\n \"stddev_ns\": 10999131,\n \"avg_ts\": 45.908921,\n \"stddev_ts\": 0.180816,\n \"samples_ns\": [ 2779147550, 2800414616, 2784911806 ],\n \"samples_ts\": [ 46.0573, 45.7075, 45.962 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:57:56Z\",\n \"avg_ns\": 9713854758,\n \"stddev_ns\": 36149058,\n \"avg_ts\": 13.177177,\n \"stddev_ts\": 0.049129,\n \"samples_ns\": [ 9740594525, 9672727293, 9728242457 ],\n \"samples_ts\": [ 13.1409, 13.2331, 13.1576 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T03:57:44Z", "avg_ns": 2788157990, "stddev_ns": 10999131, "avg_ts": 45.908921, "stddev_ts": 0.180816, "samples_ns": [ 2779147550, 2800414616, 2784911806 ], "samples_ts": [ 46.0573, 45.7075, 45.962 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T03:57:56Z", "avg_ns": 9713854758, "stddev_ns": 36149058, "avg_ts": 13.177177, "stddev_ts": 0.049129, "samples_ns": [ 9740594525, 9672727293, 9728242457 ], "samples_ts": [ 13.1409, 13.2331, 13.1576 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 348 }, { "timestamp_utc": "2025-12-09T04:00:37.326183+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:58:26Z\",\n \"avg_ns\": 2854568308,\n \"stddev_ns\": 76421137,\n \"avg_ts\": 44.861529,\n \"stddev_ts\": 1.183562,\n \"samples_ns\": [ 2803079849, 2818248955, 2942376122 ],\n \"samples_ts\": [ 45.6641, 45.4183, 43.5023 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:58:37Z\",\n \"avg_ns\": 39909928769,\n \"stddev_ns\": 1168043127,\n \"avg_ts\": 12.836098,\n \"stddev_ts\": 0.369538,\n \"samples_ns\": [ 41256189290, 39166005112, 39307591905 ],\n \"samples_ts\": [ 12.4103, 13.0726, 13.0255 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T03:58:26Z", "avg_ns": 2854568308, "stddev_ns": 76421137, "avg_ts": 44.861529, "stddev_ts": 1.183562, "samples_ns": [ 2803079849, 2818248955, 2942376122 ], "samples_ts": [ 45.6641, 45.4183, 43.5023 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T03:58:37Z", "avg_ns": 39909928769, "stddev_ns": 1168043127, "avg_ts": 12.836098, "stddev_ts": 0.369538, "samples_ns": [ 41256189290, 39166005112, 39307591905 ], "samples_ts": [ 12.4103, 13.0726, 13.0255 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 349 }, { "timestamp_utc": "2025-12-09T04:01:52.036689+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:00:37Z\",\n \"avg_ns\": 11277388922,\n \"stddev_ns\": 192486622,\n \"avg_ts\": 45.409314,\n \"stddev_ts\": 0.767583,\n \"samples_ns\": [ 11175863765, 11499384005, 11156918997 ],\n \"samples_ts\": [ 45.813, 44.5241, 45.8908 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:01:23Z\",\n \"avg_ns\": 9622818410,\n \"stddev_ns\": 28149953,\n \"avg_ts\": 13.301792,\n \"stddev_ts\": 0.038888,\n \"samples_ns\": [ 9618752791, 9596922502, 9652779938 ],\n \"samples_ts\": [ 13.3073, 13.3376, 13.2604 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T04:00:37Z", "avg_ns": 11277388922, "stddev_ns": 192486622, "avg_ts": 45.409314, "stddev_ts": 0.767583, "samples_ns": [ 11175863765, 11499384005, 11156918997 ], "samples_ts": [ 45.813, 44.5241, 45.8908 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T04:01:23Z", "avg_ns": 9622818410, "stddev_ns": 28149953, "avg_ts": 13.301792, "stddev_ts": 0.038888, "samples_ns": [ 9618752791, 9596922502, 9652779938 ], "samples_ts": [ 13.3073, 13.3376, 13.2604 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 350 }, { "timestamp_utc": "2025-12-09T04:04:30.754010+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:01:52Z\",\n \"avg_ns\": 11206063171,\n \"stddev_ns\": 25674513,\n \"avg_ts\": 45.689711,\n \"stddev_ts\": 0.104627,\n \"samples_ns\": [ 11202199708, 11182539556, 11233450250 ],\n \"samples_ts\": [ 45.7053, 45.7857, 45.5782 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:02:37Z\",\n \"avg_ns\": 37668387010,\n \"stddev_ns\": 44287672,\n \"avg_ts\": 13.592312,\n \"stddev_ts\": 0.015972,\n \"samples_ns\": [ 37653256054, 37633648223, 37718256754 ],\n \"samples_ts\": [ 13.5978, 13.6048, 13.5743 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T04:01:52Z", "avg_ns": 11206063171, "stddev_ns": 25674513, "avg_ts": 45.689711, "stddev_ts": 0.104627, "samples_ns": [ 11202199708, 11182539556, 11233450250 ], "samples_ts": [ 45.7053, 45.7857, 45.5782 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T04:02:37Z", "avg_ns": 37668387010, "stddev_ns": 44287672, "avg_ts": 13.592312, "stddev_ts": 0.015972, "samples_ns": [ 37653256054, 37633648223, 37718256754 ], "samples_ts": [ 13.5978, 13.6048, 13.5743 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 351 }, { "timestamp_utc": "2025-12-09T04:05:11.852412+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:04:31Z\",\n \"avg_ns\": 2780450151,\n \"stddev_ns\": 11516597,\n \"avg_ts\": 46.036238,\n \"stddev_ts\": 0.190874,\n \"samples_ns\": [ 2790893499, 2768098653, 2782358301 ],\n \"samples_ts\": [ 45.8634, 46.2411, 46.0041 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:04:42Z\",\n \"avg_ns\": 9711720792,\n \"stddev_ns\": 18724095,\n \"avg_ts\": 13.179983,\n \"stddev_ts\": 0.025384,\n \"samples_ns\": [ 9698703765, 9703279722, 9733178891 ],\n \"samples_ts\": [ 13.1976, 13.1914, 13.1509 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T04:04:31Z", "avg_ns": 2780450151, "stddev_ns": 11516597, "avg_ts": 46.036238, "stddev_ts": 0.190874, "samples_ns": [ 2790893499, 2768098653, 2782358301 ], "samples_ts": [ 45.8634, 46.2411, 46.0041 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T04:04:42Z", "avg_ns": 9711720792, "stddev_ns": 18724095, "avg_ts": 13.179983, "stddev_ts": 0.025384, "samples_ns": [ 9698703765, 9703279722, 9733178891 ], "samples_ts": [ 13.1976, 13.1914, 13.1509 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 352 }, { "timestamp_utc": "2025-12-09T04:07:16.881570+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:05:12Z\",\n \"avg_ns\": 2802724955,\n \"stddev_ns\": 5604643,\n \"avg_ts\": 45.669962,\n \"stddev_ts\": 0.091305,\n \"samples_ns\": [ 2808545252, 2797364269, 2802265344 ],\n \"samples_ts\": [ 45.5752, 45.7574, 45.6773 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:05:23Z\",\n \"avg_ns\": 37651131292,\n \"stddev_ns\": 63514966,\n \"avg_ts\": 13.598555,\n \"stddev_ts\": 0.022946,\n \"samples_ns\": [ 37711168915, 37657591086, 37584633877 ],\n \"samples_ts\": [ 13.5769, 13.5962, 13.6226 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T04:05:12Z", "avg_ns": 2802724955, "stddev_ns": 5604643, "avg_ts": 45.669962, "stddev_ts": 0.091305, "samples_ns": [ 2808545252, 2797364269, 2802265344 ], "samples_ts": [ 45.5752, 45.7574, 45.6773 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T04:05:23Z", "avg_ns": 37651131292, "stddev_ns": 63514966, "avg_ts": 13.598555, "stddev_ts": 0.022946, "samples_ns": [ 37711168915, 37657591086, 37584633877 ], "samples_ts": [ 13.5769, 13.5962, 13.6226 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 353 }, { "timestamp_utc": "2025-12-09T04:08:30.619629+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:07:17Z\",\n \"avg_ns\": 11202107563,\n \"stddev_ns\": 21990956,\n \"avg_ts\": 45.705802,\n \"stddev_ts\": 0.089745,\n \"samples_ns\": [ 11203798748, 11179320096, 11223203846 ],\n \"samples_ts\": [ 45.6988, 45.7988, 45.6198 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:08:02Z\",\n \"avg_ns\": 9363667767,\n \"stddev_ns\": 33840271,\n \"avg_ts\": 13.669976,\n \"stddev_ts\": 0.049326,\n \"samples_ns\": [ 9352849116, 9401594353, 9336559833 ],\n \"samples_ts\": [ 13.6857, 13.6147, 13.7095 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T04:07:17Z", "avg_ns": 11202107563, "stddev_ns": 21990956, "avg_ts": 45.705802, "stddev_ts": 0.089745, "samples_ns": [ 11203798748, 11179320096, 11223203846 ], "samples_ts": [ 45.6988, 45.7988, 45.6198 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T04:08:02Z", "avg_ns": 9363667767, "stddev_ns": 33840271, "avg_ts": 13.669976, "stddev_ts": 0.049326, "samples_ns": [ 9352849116, 9401594353, 9336559833 ], "samples_ts": [ 13.6857, 13.6147, 13.7095 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 354 }, { "timestamp_utc": "2025-12-09T04:11:09.913788+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:08:31Z\",\n \"avg_ns\": 11149158239,\n \"stddev_ns\": 13689142,\n \"avg_ts\": 45.922796,\n \"stddev_ts\": 0.056344,\n \"samples_ns\": [ 11140658186, 11164949206, 11141867326 ],\n \"samples_ts\": [ 45.9578, 45.8578, 45.9528 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:09:15Z\",\n \"avg_ns\": 37932949685,\n \"stddev_ns\": 117859055,\n \"avg_ts\": 13.497587,\n \"stddev_ts\": 0.041954,\n \"samples_ns\": [ 37810391850, 38045467649, 37942989556 ],\n \"samples_ts\": [ 13.5413, 13.4576, 13.4939 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T04:08:31Z", "avg_ns": 11149158239, "stddev_ns": 13689142, "avg_ts": 45.922796, "stddev_ts": 0.056344, "samples_ns": [ 11140658186, 11164949206, 11141867326 ], "samples_ts": [ 45.9578, 45.8578, 45.9528 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T04:09:15Z", "avg_ns": 37932949685, "stddev_ns": 117859055, "avg_ts": 13.497587, "stddev_ts": 0.041954, "samples_ns": [ 37810391850, 38045467649, 37942989556 ], "samples_ts": [ 13.5413, 13.4576, 13.4939 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 355 }, { "timestamp_utc": "2025-12-09T04:11:51.129687+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:11:10Z\",\n \"avg_ns\": 2795437242,\n \"stddev_ns\": 11044711,\n \"avg_ts\": 45.789378,\n \"stddev_ts\": 0.180914,\n \"samples_ns\": [ 2784381181, 2806470316, 2795460230 ],\n \"samples_ts\": [ 45.9707, 45.6089, 45.7885 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:11:21Z\",\n \"avg_ns\": 9732318384,\n \"stddev_ns\": 26720494,\n \"avg_ts\": 13.152122,\n \"stddev_ts\": 0.036058,\n \"samples_ns\": [ 9713158562, 9720954066, 9762842524 ],\n \"samples_ts\": [ 13.178, 13.1674, 13.1109 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T04:11:10Z", "avg_ns": 2795437242, "stddev_ns": 11044711, "avg_ts": 45.789378, "stddev_ts": 0.180914, "samples_ns": [ 2784381181, 2806470316, 2795460230 ], "samples_ts": [ 45.9707, 45.6089, 45.7885 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T04:11:21Z", "avg_ns": 9732318384, "stddev_ns": 26720494, "avg_ts": 13.152122, "stddev_ts": 0.036058, "samples_ns": [ 9713158562, 9720954066, 9762842524 ], "samples_ts": [ 13.178, 13.1674, 13.1109 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 356 }, { "timestamp_utc": "2025-12-09T04:14:00.254563+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:11:51Z\",\n \"avg_ns\": 2782211199,\n \"stddev_ns\": 15364924,\n \"avg_ts\": 46.007505,\n \"stddev_ts\": 0.253385,\n \"samples_ns\": [ 2799667764, 2776226890, 2770738944 ],\n \"samples_ts\": [ 45.7197, 46.1057, 46.1971 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:12:02Z\",\n \"avg_ns\": 39049741785,\n \"stddev_ns\": 102846451,\n \"avg_ts\": 13.111543,\n \"stddev_ts\": 0.034524,\n \"samples_ns\": [ 38950246092, 39155639163, 39043340102 ],\n \"samples_ts\": [ 13.145, 13.076, 13.1136 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T04:11:51Z", "avg_ns": 2782211199, "stddev_ns": 15364924, "avg_ts": 46.007505, "stddev_ts": 0.253385, "samples_ns": [ 2799667764, 2776226890, 2770738944 ], "samples_ts": [ 45.7197, 46.1057, 46.1971 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T04:12:02Z", "avg_ns": 39049741785, "stddev_ns": 102846451, "avg_ts": 13.111543, "stddev_ts": 0.034524, "samples_ns": [ 38950246092, 39155639163, 39043340102 ], "samples_ts": [ 13.145, 13.076, 13.1136 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 357 }, { "timestamp_utc": "2025-12-09T04:15:14.500034+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:14:00Z\",\n \"avg_ns\": 11335750956,\n \"stddev_ns\": 11106854,\n \"avg_ts\": 45.166865,\n \"stddev_ts\": 0.044254,\n \"samples_ns\": [ 11346569431, 11336304940, 11324378499 ],\n \"samples_ts\": [ 45.1238, 45.1646, 45.2122 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:14:46Z\",\n \"avg_ns\": 9343551297,\n \"stddev_ns\": 62742953,\n \"avg_ts\": 13.699699,\n \"stddev_ts\": 0.091833,\n \"samples_ns\": [ 9332063733, 9411244218, 9287345941 ],\n \"samples_ts\": [ 13.7162, 13.6008, 13.7822 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T04:14:00Z", "avg_ns": 11335750956, "stddev_ns": 11106854, "avg_ts": 45.166865, "stddev_ts": 0.044254, "samples_ns": [ 11346569431, 11336304940, 11324378499 ], "samples_ts": [ 45.1238, 45.1646, 45.2122 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T04:14:46Z", "avg_ns": 9343551297, "stddev_ns": 62742953, "avg_ts": 13.699699, "stddev_ts": 0.091833, "samples_ns": [ 9332063733, 9411244218, 9287345941 ], "samples_ts": [ 13.7162, 13.6008, 13.7822 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 358 }, { "timestamp_utc": "2025-12-09T04:17:54.172075+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:15:15Z\",\n \"avg_ns\": 11390388225,\n \"stddev_ns\": 12366140,\n \"avg_ts\": 44.950215,\n \"stddev_ts\": 0.048781,\n \"samples_ns\": [ 11387385452, 11379800462, 11403978762 ],\n \"samples_ts\": [ 44.962, 44.992, 44.8966 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:16:00Z\",\n \"avg_ns\": 37747980697,\n \"stddev_ns\": 6311260,\n \"avg_ts\": 13.563640,\n \"stddev_ts\": 0.002268,\n \"samples_ns\": [ 37755148514, 37745536441, 37743257136 ],\n \"samples_ts\": [ 13.5611, 13.5645, 13.5653 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T04:15:15Z", "avg_ns": 11390388225, "stddev_ns": 12366140, "avg_ts": 44.950215, "stddev_ts": 0.048781, "samples_ns": [ 11387385452, 11379800462, 11403978762 ], "samples_ts": [ 44.962, 44.992, 44.8966 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_type": "gemma3 1B Q8_0", "model_size": 1062773248, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T04:16:00Z", "avg_ns": 37747980697, "stddev_ns": 6311260, "avg_ts": 13.56364, "stddev_ts": 0.002268, "samples_ns": [ 37755148514, 37745536441, 37743257136 ], "samples_ts": [ 13.5611, 13.5645, 13.5653 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 359 }, { "timestamp_utc": "2025-12-09T04:19:03.701485+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:17:59Z\",\n \"avg_ns\": 5620038820,\n \"stddev_ns\": 7053294,\n \"avg_ts\": 22.775667,\n \"stddev_ts\": 0.028597,\n \"samples_ns\": [ 5622213355, 5612154723, 5625748383 ],\n \"samples_ts\": [ 22.7668, 22.8076, 22.7525 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:18:21Z\",\n \"avg_ns\": 13883052206,\n \"stddev_ns\": 48763607,\n \"avg_ts\": 9.219951,\n \"stddev_ts\": 0.032433,\n \"samples_ns\": [ 13898221417, 13922428042, 13828507161 ],\n \"samples_ts\": [ 9.20981, 9.1938, 9.25624 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T04:17:59Z", "avg_ns": 5620038820, "stddev_ns": 7053294, "avg_ts": 22.775667, "stddev_ts": 0.028597, "samples_ns": [ 5622213355, 5612154723, 5625748383 ], "samples_ts": [ 22.7668, 22.8076, 22.7525 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T04:18:21Z", "avg_ns": 13883052206, "stddev_ns": 48763607, "avg_ts": 9.219951, "stddev_ts": 0.032433, "samples_ns": [ 13898221417, 13922428042, 13828507161 ], "samples_ts": [ 9.20981, 9.1938, 9.25624 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 512, "ubatch_size": 128, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 360 }, { "timestamp_utc": "2025-12-09T04:22:10.719201+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:19:04Z\",\n \"avg_ns\": 5612960025,\n \"stddev_ns\": 7023977,\n \"avg_ts\": 22.804391,\n \"stddev_ts\": 0.028529,\n \"samples_ns\": [ 5620481007, 5606570394, 5611828674 ],\n \"samples_ts\": [ 22.7739, 22.8304, 22.809 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:19:27Z\",\n \"avg_ns\": 54476033950,\n \"stddev_ns\": 134478161,\n \"avg_ts\": 9.398667,\n \"stddev_ts\": 0.023206,\n \"samples_ns\": [ 54337816040, 54483854790, 54606431020 ],\n \"samples_ts\": [ 9.42254, 9.39728, 9.37619 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T04:19:04Z", "avg_ns": 5612960025, "stddev_ns": 7023977, "avg_ts": 22.804391, "stddev_ts": 0.028529, "samples_ns": [ 5620481007, 5606570394, 5611828674 ], "samples_ts": [ 22.7739, 22.8304, 22.809 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T04:19:27Z", "avg_ns": 54476033950, "stddev_ns": 134478161, "avg_ts": 9.398667, "stddev_ts": 0.023206, "samples_ns": [ 54337816040, 54483854790, 54606431020 ], "samples_ts": [ 9.42254, 9.39728, 9.37619 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 512, "ubatch_size": 128, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 361 }, { "timestamp_utc": "2025-12-09T04:24:22.584061+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:22:11Z\",\n \"avg_ns\": 22484501764,\n \"stddev_ns\": 19893499,\n \"avg_ts\": 22.771253,\n \"stddev_ts\": 0.020152,\n \"samples_ns\": [ 22462983936, 22502224056, 22488297300 ],\n \"samples_ts\": [ 22.7931, 22.7533, 22.7674 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:23:41Z\",\n \"avg_ns\": 13618249198,\n \"stddev_ns\": 41051613,\n \"avg_ts\": 9.399209,\n \"stddev_ts\": 0.028366,\n \"samples_ns\": [ 13572732440, 13629545630, 13652469526 ],\n \"samples_ts\": [ 9.43067, 9.39136, 9.37559 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T04:22:11Z", "avg_ns": 22484501764, "stddev_ns": 19893499, "avg_ts": 22.771253, "stddev_ts": 0.020152, "samples_ns": [ 22462983936, 22502224056, 22488297300 ], "samples_ts": [ 22.7931, 22.7533, 22.7674 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T04:23:41Z", "avg_ns": 13618249198, "stddev_ns": 41051613, "avg_ts": 9.399209, "stddev_ts": 0.028366, "samples_ns": [ 13572732440, 13629545630, 13652469526 ], "samples_ts": [ 9.43067, 9.39136, 9.37559 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 512, "ubatch_size": 128, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 362 }, { "timestamp_utc": "2025-12-09T04:28:36.947715+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:24:23Z\",\n \"avg_ns\": 22579471804,\n \"stddev_ns\": 10451059,\n \"avg_ts\": 22.675467,\n \"stddev_ts\": 0.010496,\n \"samples_ns\": [ 22567693838, 22583093827, 22587627749 ],\n \"samples_ts\": [ 22.6873, 22.6718, 22.6673 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:25:53Z\",\n \"avg_ns\": 54327406817,\n \"stddev_ns\": 107354468,\n \"avg_ts\": 9.424365,\n \"stddev_ts\": 0.018603,\n \"samples_ns\": [ 54450436814, 54279031169, 54252752469 ],\n \"samples_ts\": [ 9.40305, 9.43274, 9.43731 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T04:24:23Z", "avg_ns": 22579471804, "stddev_ns": 10451059, "avg_ts": 22.675467, "stddev_ts": 0.010496, "samples_ns": [ 22567693838, 22583093827, 22587627749 ], "samples_ts": [ 22.6873, 22.6718, 22.6673 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T04:25:53Z", "avg_ns": 54327406817, "stddev_ns": 107354468, "avg_ts": 9.424365, "stddev_ts": 0.018603, "samples_ns": [ 54450436814, 54279031169, 54252752469 ], "samples_ts": [ 9.40305, 9.43274, 9.43731 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 512, "ubatch_size": 128, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 363 }, { "timestamp_utc": "2025-12-09T04:29:42.391174+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:28:37Z\",\n \"avg_ns\": 5612398317,\n \"stddev_ns\": 3376744,\n \"avg_ts\": 22.806655,\n \"stddev_ts\": 0.013726,\n \"samples_ns\": [ 5613937767, 5608526178, 5614731006 ],\n \"samples_ts\": [ 22.8004, 22.8224, 22.7972 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:29:00Z\",\n \"avg_ns\": 13965044801,\n \"stddev_ns\": 12348214,\n \"avg_ts\": 9.165747,\n \"stddev_ts\": 0.008101,\n \"samples_ns\": [ 13979106491, 13960058474, 13955969438 ],\n \"samples_ts\": [ 9.15652, 9.16902, 9.1717 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T04:28:37Z", "avg_ns": 5612398317, "stddev_ns": 3376744, "avg_ts": 22.806655, "stddev_ts": 0.013726, "samples_ns": [ 5613937767, 5608526178, 5614731006 ], "samples_ts": [ 22.8004, 22.8224, 22.7972 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T04:29:00Z", "avg_ns": 13965044801, "stddev_ns": 12348214, "avg_ts": 9.165747, "stddev_ts": 0.008101, "samples_ns": [ 13979106491, 13960058474, 13955969438 ], "samples_ts": [ 9.15652, 9.16902, 9.1717 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 512, "ubatch_size": 256, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 364 }, { "timestamp_utc": "2025-12-09T04:32:49.353814+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:29:43Z\",\n \"avg_ns\": 5615878878,\n \"stddev_ns\": 9838762,\n \"avg_ts\": 22.792561,\n \"stddev_ts\": 0.039964,\n \"samples_ns\": [ 5604790110, 5623563510, 5619283014 ],\n \"samples_ts\": [ 22.8376, 22.7614, 22.7787 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:30:05Z\",\n \"avg_ns\": 54473749387,\n \"stddev_ns\": 126985701,\n \"avg_ts\": 9.399057,\n \"stddev_ts\": 0.021910,\n \"samples_ns\": [ 54601219102, 54472775762, 54347253297 ],\n \"samples_ts\": [ 9.37708, 9.39919, 9.4209 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T04:29:43Z", "avg_ns": 5615878878, "stddev_ns": 9838762, "avg_ts": 22.792561, "stddev_ts": 0.039964, "samples_ns": [ 5604790110, 5623563510, 5619283014 ], "samples_ts": [ 22.8376, 22.7614, 22.7787 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T04:30:05Z", "avg_ns": 54473749387, "stddev_ns": 126985701, "avg_ts": 9.399057, "stddev_ts": 0.02191, "samples_ns": [ 54601219102, 54472775762, 54347253297 ], "samples_ts": [ 9.37708, 9.39919, 9.4209 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 512, "ubatch_size": 256, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 365 }, { "timestamp_utc": "2025-12-09T04:35:01.461468+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:32:50Z\",\n \"avg_ns\": 22573556736,\n \"stddev_ns\": 28053737,\n \"avg_ts\": 22.681429,\n \"stddev_ts\": 0.028191,\n \"samples_ns\": [ 22544768397, 22600813092, 22575088719 ],\n \"samples_ts\": [ 22.7104, 22.6541, 22.6799 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:34:20Z\",\n \"avg_ns\": 13579916807,\n \"stddev_ns\": 36104321,\n \"avg_ts\": 9.425728,\n \"stddev_ts\": 0.025026,\n \"samples_ns\": [ 13565650054, 13620974412, 13553125956 ],\n \"samples_ts\": [ 9.4356, 9.39727, 9.44432 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T04:32:50Z", "avg_ns": 22573556736, "stddev_ns": 28053737, "avg_ts": 22.681429, "stddev_ts": 0.028191, "samples_ns": [ 22544768397, 22600813092, 22575088719 ], "samples_ts": [ 22.7104, 22.6541, 22.6799 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T04:34:20Z", "avg_ns": 13579916807, "stddev_ns": 36104321, "avg_ts": 9.425728, "stddev_ts": 0.025026, "samples_ns": [ 13565650054, 13620974412, 13553125956 ], "samples_ts": [ 9.4356, 9.39727, 9.44432 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 512, "ubatch_size": 256, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 366 }, { "timestamp_utc": "2025-12-09T04:39:15.839018+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:35:02Z\",\n \"avg_ns\": 22587961406,\n \"stddev_ns\": 13590545,\n \"avg_ts\": 22.666947,\n \"stddev_ts\": 0.013637,\n \"samples_ns\": [ 22588153680, 22601453133, 22574277407 ],\n \"samples_ts\": [ 22.6667, 22.6534, 22.6807 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:36:32Z\",\n \"avg_ns\": 54298618190,\n \"stddev_ns\": 126341868,\n \"avg_ts\": 9.429372,\n \"stddev_ts\": 0.021924,\n \"samples_ns\": [ 54270173440, 54436757196, 54188923936 ],\n \"samples_ts\": [ 9.43428, 9.40541, 9.44843 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T04:35:02Z", "avg_ns": 22587961406, "stddev_ns": 13590545, "avg_ts": 22.666947, "stddev_ts": 0.013637, "samples_ns": [ 22588153680, 22601453133, 22574277407 ], "samples_ts": [ 22.6667, 22.6534, 22.6807 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T04:36:32Z", "avg_ns": 54298618190, "stddev_ns": 126341868, "avg_ts": 9.429372, "stddev_ts": 0.021924, "samples_ns": [ 54270173440, 54436757196, 54188923936 ], "samples_ts": [ 9.43428, 9.40541, 9.44843 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 512, "ubatch_size": 256, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 367 }, { "timestamp_utc": "2025-12-09T04:40:21.207996+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:39:16Z\",\n \"avg_ns\": 5604800628,\n \"stddev_ns\": 6029171,\n \"avg_ts\": 22.837583,\n \"stddev_ts\": 0.024574,\n \"samples_ns\": [ 5609633304, 5606722912, 5598045670 ],\n \"samples_ts\": [ 22.8179, 22.8297, 22.8651 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:39:39Z\",\n \"avg_ns\": 13963123462,\n \"stddev_ns\": 21187047,\n \"avg_ts\": 9.167017,\n \"stddev_ts\": 0.013903,\n \"samples_ns\": [ 13959101322, 13986032615, 13944236451 ],\n \"samples_ts\": [ 9.16964, 9.15199, 9.17942 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T04:39:16Z", "avg_ns": 5604800628, "stddev_ns": 6029171, "avg_ts": 22.837583, "stddev_ts": 0.024574, "samples_ns": [ 5609633304, 5606722912, 5598045670 ], "samples_ts": [ 22.8179, 22.8297, 22.8651 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T04:39:39Z", "avg_ns": 13963123462, "stddev_ns": 21187047, "avg_ts": 9.167017, "stddev_ts": 0.013903, "samples_ns": [ 13959101322, 13986032615, 13944236451 ], "samples_ts": [ 9.16964, 9.15199, 9.17942 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 512, "ubatch_size": 512, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 368 }, { "timestamp_utc": "2025-12-09T04:43:28.932900+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:40:22Z\",\n \"avg_ns\": 5699824163,\n \"stddev_ns\": 12953737,\n \"avg_ts\": 22.456910,\n \"stddev_ts\": 0.051045,\n \"samples_ns\": [ 5686521279, 5712396995, 5700554217 ],\n \"samples_ts\": [ 22.5094, 22.4074, 22.454 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:40:44Z\",\n \"avg_ns\": 54615790298,\n \"stddev_ns\": 83163750,\n \"avg_ts\": 9.374593,\n \"stddev_ts\": 0.014286,\n \"samples_ns\": [ 54521181744, 54677342951, 54648846201 ],\n \"samples_ts\": [ 9.39085, 9.36402, 9.36891 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T04:40:22Z", "avg_ns": 5699824163, "stddev_ns": 12953737, "avg_ts": 22.45691, "stddev_ts": 0.051045, "samples_ns": [ 5686521279, 5712396995, 5700554217 ], "samples_ts": [ 22.5094, 22.4074, 22.454 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T04:40:44Z", "avg_ns": 54615790298, "stddev_ns": 83163750, "avg_ts": 9.374593, "stddev_ts": 0.014286, "samples_ns": [ 54521181744, 54677342951, 54648846201 ], "samples_ts": [ 9.39085, 9.36402, 9.36891 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 512, "ubatch_size": 512, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 369 }, { "timestamp_utc": "2025-12-09T04:45:42.371944+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:43:29Z\",\n \"avg_ns\": 23035532254,\n \"stddev_ns\": 44057935,\n \"avg_ts\": 22.226586,\n \"stddev_ts\": 0.042514,\n \"samples_ns\": [ 23036712454, 22990896338, 23078987971 ],\n \"samples_ts\": [ 22.2254, 22.2697, 22.1847 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:45:02Z\",\n \"avg_ns\": 13390334336,\n \"stddev_ns\": 19142006,\n \"avg_ts\": 9.559147,\n \"stddev_ts\": 0.013654,\n \"samples_ns\": [ 13376764616, 13382010039, 13412228355 ],\n \"samples_ts\": [ 9.56883, 9.56508, 9.54353 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T04:43:29Z", "avg_ns": 23035532254, "stddev_ns": 44057935, "avg_ts": 22.226586, "stddev_ts": 0.042514, "samples_ns": [ 23036712454, 22990896338, 23078987971 ], "samples_ts": [ 22.2254, 22.2697, 22.1847 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T04:45:02Z", "avg_ns": 13390334336, "stddev_ns": 19142006, "avg_ts": 9.559147, "stddev_ts": 0.013654, "samples_ns": [ 13376764616, 13382010039, 13412228355 ], "samples_ts": [ 9.56883, 9.56508, 9.54353 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 512, "ubatch_size": 512, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 370 }, { "timestamp_utc": "2025-12-09T04:49:59.696424+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:45:43Z\",\n \"avg_ns\": 22859834330,\n \"stddev_ns\": 30936418,\n \"avg_ts\": 22.397390,\n \"stddev_ts\": 0.030296,\n \"samples_ns\": [ 22852151202, 22833463872, 22893887917 ],\n \"samples_ts\": [ 22.4049, 22.4232, 22.364 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:47:14Z\",\n \"avg_ns\": 54927665300,\n \"stddev_ns\": 51421587,\n \"avg_ts\": 9.321356,\n \"stddev_ts\": 0.008729,\n \"samples_ns\": [ 54942802295, 54870375346, 54969818261 ],\n \"samples_ts\": [ 9.31878, 9.33108, 9.3142 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T04:45:43Z", "avg_ns": 22859834330, "stddev_ns": 30936418, "avg_ts": 22.39739, "stddev_ts": 0.030296, "samples_ns": [ 22852151202, 22833463872, 22893887917 ], "samples_ts": [ 22.4049, 22.4232, 22.364 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T04:47:14Z", "avg_ns": 54927665300, "stddev_ns": 51421587, "avg_ts": 9.321356, "stddev_ts": 0.008729, "samples_ns": [ 54942802295, 54870375346, 54969818261 ], "samples_ts": [ 9.31878, 9.33108, 9.3142 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 512, "ubatch_size": 512, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 371 }, { "timestamp_utc": "2025-12-09T04:51:04.854904+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:50:00Z\",\n \"avg_ns\": 5608969417,\n \"stddev_ns\": 18553394,\n \"avg_ts\": 22.820758,\n \"stddev_ts\": 0.075387,\n \"samples_ns\": [ 5593715494, 5603568837, 5629623920 ],\n \"samples_ts\": [ 22.8828, 22.8426, 22.7369 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:50:23Z\",\n \"avg_ns\": 13869689791,\n \"stddev_ns\": 49351106,\n \"avg_ts\": 9.228835,\n \"stddev_ts\": 0.032903,\n \"samples_ns\": [ 13812923071, 13893753598, 13902392706 ],\n \"samples_ts\": [ 9.26668, 9.21277, 9.20705 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T04:50:00Z", "avg_ns": 5608969417, "stddev_ns": 18553394, "avg_ts": 22.820758, "stddev_ts": 0.075387, "samples_ns": [ 5593715494, 5603568837, 5629623920 ], "samples_ts": [ 22.8828, 22.8426, 22.7369 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T04:50:23Z", "avg_ns": 13869689791, "stddev_ns": 49351106, "avg_ts": 9.228835, "stddev_ts": 0.032903, "samples_ns": [ 13812923071, 13893753598, 13902392706 ], "samples_ts": [ 9.26668, 9.21277, 9.20705 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 372 }, { "timestamp_utc": "2025-12-09T04:54:11.562257+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:51:05Z\",\n \"avg_ns\": 5637772986,\n \"stddev_ns\": 5560909,\n \"avg_ts\": 22.704015,\n \"stddev_ts\": 0.022380,\n \"samples_ns\": [ 5634660974, 5634465392, 5644192593 ],\n \"samples_ts\": [ 22.7165, 22.7173, 22.6782 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:51:28Z\",\n \"avg_ns\": 54347639018,\n \"stddev_ns\": 53579687,\n \"avg_ts\": 9.420839,\n \"stddev_ts\": 0.009287,\n \"samples_ns\": [ 54402293291, 54345419878, 54295203886 ],\n \"samples_ts\": [ 9.41137, 9.42122, 9.42993 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T04:51:05Z", "avg_ns": 5637772986, "stddev_ns": 5560909, "avg_ts": 22.704015, "stddev_ts": 0.02238, "samples_ns": [ 5634660974, 5634465392, 5644192593 ], "samples_ts": [ 22.7165, 22.7173, 22.6782 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T04:51:28Z", "avg_ns": 54347639018, "stddev_ns": 53579687, "avg_ts": 9.420839, "stddev_ts": 0.009287, "samples_ns": [ 54402293291, 54345419878, 54295203886 ], "samples_ts": [ 9.41137, 9.42122, 9.42993 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 373 }, { "timestamp_utc": "2025-12-09T04:56:24.583000+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:54:12Z\",\n \"avg_ns\": 22614577340,\n \"stddev_ns\": 27168416,\n \"avg_ts\": 22.640286,\n \"stddev_ts\": 0.027204,\n \"samples_ns\": [ 22617782751, 22640000024, 22585949247 ],\n \"samples_ts\": [ 22.6371, 22.6148, 22.669 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:55:42Z\",\n \"avg_ns\": 13824078683,\n \"stddev_ns\": 21769841,\n \"avg_ts\": 9.259222,\n \"stddev_ts\": 0.014583,\n \"samples_ns\": [ 13845163754, 13801684431, 13825387866 ],\n \"samples_ts\": [ 9.24511, 9.27423, 9.25833 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T04:54:12Z", "avg_ns": 22614577340, "stddev_ns": 27168416, "avg_ts": 22.640286, "stddev_ts": 0.027204, "samples_ns": [ 22617782751, 22640000024, 22585949247 ], "samples_ts": [ 22.6371, 22.6148, 22.669 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T04:55:42Z", "avg_ns": 13824078683, "stddev_ns": 21769841, "avg_ts": 9.259222, "stddev_ts": 0.014583, "samples_ns": [ 13845163754, 13801684431, 13825387866 ], "samples_ts": [ 9.24511, 9.27423, 9.25833 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 374 }, { "timestamp_utc": "2025-12-09T05:00:39.777249+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:56:25Z\",\n \"avg_ns\": 22824641041,\n \"stddev_ns\": 39733803,\n \"avg_ts\": 22.431942,\n \"stddev_ts\": 0.039013,\n \"samples_ns\": [ 22870163628, 22806829502, 22796929995 ],\n \"samples_ts\": [ 22.3872, 22.4494, 22.4592 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:57:56Z\",\n \"avg_ns\": 54241900217,\n \"stddev_ns\": 73306205,\n \"avg_ts\": 9.439209,\n \"stddev_ts\": 0.012750,\n \"samples_ns\": [ 54323350409, 54181222904, 54221127340 ],\n \"samples_ts\": [ 9.42504, 9.44977, 9.44281 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T04:56:25Z", "avg_ns": 22824641041, "stddev_ns": 39733803, "avg_ts": 22.431942, "stddev_ts": 0.039013, "samples_ns": [ 22870163628, 22806829502, 22796929995 ], "samples_ts": [ 22.3872, 22.4494, 22.4592 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T04:57:56Z", "avg_ns": 54241900217, "stddev_ns": 73306205, "avg_ts": 9.439209, "stddev_ts": 0.01275, "samples_ns": [ 54323350409, 54181222904, 54221127340 ], "samples_ts": [ 9.42504, 9.44977, 9.44281 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 375 }, { "timestamp_utc": "2025-12-09T05:01:44.616423+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:00:40Z\",\n \"avg_ns\": 5639868728,\n \"stddev_ns\": 5905466,\n \"avg_ts\": 22.695580,\n \"stddev_ts\": 0.023765,\n \"samples_ns\": [ 5645410664, 5640537015, 5633658507 ],\n \"samples_ts\": [ 22.6733, 22.6929, 22.7206 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:01:03Z\",\n \"avg_ns\": 13732436684,\n \"stddev_ns\": 21497734,\n \"avg_ts\": 9.321012,\n \"stddev_ts\": 0.014579,\n \"samples_ns\": [ 13717461782, 13722779477, 13757068795 ],\n \"samples_ts\": [ 9.33117, 9.32756, 9.30431 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T05:00:40Z", "avg_ns": 5639868728, "stddev_ns": 5905466, "avg_ts": 22.69558, "stddev_ts": 0.023765, "samples_ns": [ 5645410664, 5640537015, 5633658507 ], "samples_ts": [ 22.6733, 22.6929, 22.7206 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T05:01:03Z", "avg_ns": 13732436684, "stddev_ns": 21497734, "avg_ts": 9.321012, "stddev_ts": 0.014579, "samples_ns": [ 13717461782, 13722779477, 13757068795 ], "samples_ts": [ 9.33117, 9.32756, 9.30431 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 376 }, { "timestamp_utc": "2025-12-09T05:04:50.737790+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:01:45Z\",\n \"avg_ns\": 5628211366,\n \"stddev_ns\": 11015177,\n \"avg_ts\": 22.742630,\n \"stddev_ts\": 0.044557,\n \"samples_ns\": [ 5633274558, 5635784303, 5615575238 ],\n \"samples_ts\": [ 22.7221, 22.712, 22.7937 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:02:07Z\",\n \"avg_ns\": 54182194902,\n \"stddev_ns\": 58211715,\n \"avg_ts\": 9.449606,\n \"stddev_ts\": 0.010147,\n \"samples_ns\": [ 54137214565, 54247940966, 54161429177 ],\n \"samples_ts\": [ 9.45745, 9.43815, 9.45322 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T05:01:45Z", "avg_ns": 5628211366, "stddev_ns": 11015177, "avg_ts": 22.74263, "stddev_ts": 0.044557, "samples_ns": [ 5633274558, 5635784303, 5615575238 ], "samples_ts": [ 22.7221, 22.712, 22.7937 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T05:02:07Z", "avg_ns": 54182194902, "stddev_ns": 58211715, "avg_ts": 9.449606, "stddev_ts": 0.010147, "samples_ns": [ 54137214565, 54247940966, 54161429177 ], "samples_ts": [ 9.45745, 9.43815, 9.45322 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 377 }, { "timestamp_utc": "2025-12-09T05:07:03.130738+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:04:51Z\",\n \"avg_ns\": 22770685984,\n \"stddev_ns\": 33196879,\n \"avg_ts\": 22.485081,\n \"stddev_ts\": 0.032803,\n \"samples_ns\": [ 22733113990, 22782892333, 22796051629 ],\n \"samples_ts\": [ 22.5222, 22.473, 22.46 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:06:22Z\",\n \"avg_ns\": 13396621658,\n \"stddev_ns\": 44459078,\n \"avg_ts\": 9.554718,\n \"stddev_ts\": 0.031703,\n \"samples_ns\": [ 13394921128, 13441906303, 13353037545 ],\n \"samples_ts\": [ 9.55586, 9.52246, 9.58583 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T05:04:51Z", "avg_ns": 22770685984, "stddev_ns": 33196879, "avg_ts": 22.485081, "stddev_ts": 0.032803, "samples_ns": [ 22733113990, 22782892333, 22796051629 ], "samples_ts": [ 22.5222, 22.473, 22.46 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T05:06:22Z", "avg_ns": 13396621658, "stddev_ns": 44459078, "avg_ts": 9.554718, "stddev_ts": 0.031703, "samples_ns": [ 13394921128, 13441906303, 13353037545 ], "samples_ts": [ 9.55586, 9.52246, 9.58583 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 378 }, { "timestamp_utc": "2025-12-09T05:11:17.538056+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:07:04Z\",\n \"avg_ns\": 22675793368,\n \"stddev_ns\": 15101187,\n \"avg_ts\": 22.579151,\n \"stddev_ts\": 0.015033,\n \"samples_ns\": [ 22662849659, 22692384074, 22672146371 ],\n \"samples_ts\": [ 22.592, 22.5626, 22.5828 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:08:34Z\",\n \"avg_ns\": 54191370697,\n \"stddev_ns\": 51985005,\n \"avg_ts\": 9.448004,\n \"stddev_ts\": 0.009059,\n \"samples_ns\": [ 54250247580, 54151804310, 54172060201 ],\n \"samples_ts\": [ 9.43774, 9.4549, 9.45137 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T05:07:04Z", "avg_ns": 22675793368, "stddev_ns": 15101187, "avg_ts": 22.579151, "stddev_ts": 0.015033, "samples_ns": [ 22662849659, 22692384074, 22672146371 ], "samples_ts": [ 22.592, 22.5626, 22.5828 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T05:08:34Z", "avg_ns": 54191370697, "stddev_ns": 51985005, "avg_ts": 9.448004, "stddev_ts": 0.009059, "samples_ns": [ 54250247580, 54151804310, 54172060201 ], "samples_ts": [ 9.43774, 9.4549, 9.45137 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 379 }, { "timestamp_utc": "2025-12-09T05:12:22.705772+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:11:18Z\",\n \"avg_ns\": 5674573496,\n \"stddev_ns\": 13393830,\n \"avg_ts\": 22.556845,\n \"stddev_ts\": 0.053171,\n \"samples_ns\": [ 5689996923, 5667853307, 5665870258 ],\n \"samples_ts\": [ 22.4956, 22.5835, 22.5914 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:11:41Z\",\n \"avg_ns\": 13785577967,\n \"stddev_ns\": 14926363,\n \"avg_ts\": 9.285073,\n \"stddev_ts\": 0.010048,\n \"samples_ns\": [ 13774063052, 13802441791, 13780229058 ],\n \"samples_ts\": [ 9.29283, 9.27372, 9.28867 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T05:11:18Z", "avg_ns": 5674573496, "stddev_ns": 13393830, "avg_ts": 22.556845, "stddev_ts": 0.053171, "samples_ns": [ 5689996923, 5667853307, 5665870258 ], "samples_ts": [ 22.4956, 22.5835, 22.5914 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T05:11:41Z", "avg_ns": 13785577967, "stddev_ns": 14926363, "avg_ts": 9.285073, "stddev_ts": 0.010048, "samples_ns": [ 13774063052, 13802441791, 13780229058 ], "samples_ts": [ 9.29283, 9.27372, 9.28867 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 380 }, { "timestamp_utc": "2025-12-09T05:15:29.368921+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:12:23Z\",\n \"avg_ns\": 5633414452,\n \"stddev_ns\": 645101,\n \"avg_ts\": 22.721566,\n \"stddev_ts\": 0.002567,\n \"samples_ns\": [ 5633655023, 5632692933, 5633895402 ],\n \"samples_ts\": [ 22.7206, 22.7245, 22.7196 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:12:46Z\",\n \"avg_ns\": 54337853495,\n \"stddev_ns\": 26872166,\n \"avg_ts\": 9.422531,\n \"stddev_ts\": 0.004661,\n \"samples_ns\": [ 54307818293, 54359614533, 54346127660 ],\n \"samples_ts\": [ 9.42774, 9.41876, 9.42109 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T05:12:23Z", "avg_ns": 5633414452, "stddev_ns": 645101, "avg_ts": 22.721566, "stddev_ts": 0.002567, "samples_ns": [ 5633655023, 5632692933, 5633895402 ], "samples_ts": [ 22.7206, 22.7245, 22.7196 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T05:12:46Z", "avg_ns": 54337853495, "stddev_ns": 26872166, "avg_ts": 9.422531, "stddev_ts": 0.004661, "samples_ns": [ 54307818293, 54359614533, 54346127660 ], "samples_ts": [ 9.42774, 9.41876, 9.42109 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 381 }, { "timestamp_utc": "2025-12-09T05:17:42.218959+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:15:30Z\",\n \"avg_ns\": 22850012933,\n \"stddev_ns\": 13946588,\n \"avg_ts\": 22.406995,\n \"stddev_ts\": 0.013676,\n \"samples_ns\": [ 22851997031, 22862859568, 22835182202 ],\n \"samples_ts\": [ 22.405, 22.3944, 22.4215 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:17:01Z\",\n \"avg_ns\": 13459569265,\n \"stddev_ns\": 13368231,\n \"avg_ts\": 9.509969,\n \"stddev_ts\": 0.009451,\n \"samples_ns\": [ 13468008524, 13466543106, 13444156165 ],\n \"samples_ts\": [ 9.504, 9.50504, 9.52087 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T05:15:30Z", "avg_ns": 22850012933, "stddev_ns": 13946588, "avg_ts": 22.406995, "stddev_ts": 0.013676, "samples_ns": [ 22851997031, 22862859568, 22835182202 ], "samples_ts": [ 22.405, 22.3944, 22.4215 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T05:17:01Z", "avg_ns": 13459569265, "stddev_ns": 13368231, "avg_ts": 9.509969, "stddev_ts": 0.009451, "samples_ns": [ 13468008524, 13466543106, 13444156165 ], "samples_ts": [ 9.504, 9.50504, 9.52087 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 382 }, { "timestamp_utc": "2025-12-09T05:22:02.355272+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:17:43Z\",\n \"avg_ns\": 23066046656,\n \"stddev_ns\": 4355097,\n \"avg_ts\": 22.197129,\n \"stddev_ts\": 0.004186,\n \"samples_ns\": [ 23070536574, 23065751370, 23061852026 ],\n \"samples_ts\": [ 22.1928, 22.1974, 22.2012 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:19:15Z\",\n \"avg_ns\": 55578316023,\n \"stddev_ns\": 118079402,\n \"avg_ts\": 9.212254,\n \"stddev_ts\": 0.019596,\n \"samples_ns\": [ 55442094948, 55651476113, 55641377010 ],\n \"samples_ts\": [ 9.23486, 9.20012, 9.20179 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T05:17:43Z", "avg_ns": 23066046656, "stddev_ns": 4355097, "avg_ts": 22.197129, "stddev_ts": 0.004186, "samples_ns": [ 23070536574, 23065751370, 23061852026 ], "samples_ts": [ 22.1928, 22.1974, 22.2012 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T05:19:15Z", "avg_ns": 55578316023, "stddev_ns": 118079402, "avg_ts": 9.212254, "stddev_ts": 0.019596, "samples_ns": [ 55442094948, 55651476113, 55641377010 ], "samples_ts": [ 9.23486, 9.20012, 9.20179 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 383 }, { "timestamp_utc": "2025-12-09T05:23:07.885976+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:22:03Z\",\n \"avg_ns\": 5626351728,\n \"stddev_ns\": 14064804,\n \"avg_ts\": 22.750183,\n \"stddev_ts\": 0.056822,\n \"samples_ns\": [ 5614236365, 5623043464, 5641775357 ],\n \"samples_ts\": [ 22.7992, 22.7635, 22.6879 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:22:25Z\",\n \"avg_ns\": 13936294061,\n \"stddev_ns\": 31977288,\n \"avg_ts\": 9.184683,\n \"stddev_ts\": 0.021047,\n \"samples_ns\": [ 13916701816, 13973194189, 13918986180 ],\n \"samples_ts\": [ 9.19758, 9.1604, 9.19607 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T05:22:03Z", "avg_ns": 5626351728, "stddev_ns": 14064804, "avg_ts": 22.750183, "stddev_ts": 0.056822, "samples_ns": [ 5614236365, 5623043464, 5641775357 ], "samples_ts": [ 22.7992, 22.7635, 22.6879 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T05:22:25Z", "avg_ns": 13936294061, "stddev_ns": 31977288, "avg_ts": 9.184683, "stddev_ts": 0.021047, "samples_ns": [ 13916701816, 13973194189, 13918986180 ], "samples_ts": [ 9.19758, 9.1604, 9.19607 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 384 }, { "timestamp_utc": "2025-12-09T05:26:14.647644+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:23:08Z\",\n \"avg_ns\": 5694023335,\n \"stddev_ns\": 3379471,\n \"avg_ts\": 22.479716,\n \"stddev_ts\": 0.013342,\n \"samples_ns\": [ 5690267661, 5694986670, 5696815675 ],\n \"samples_ts\": [ 22.4945, 22.4759, 22.4687 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:23:31Z\",\n \"avg_ns\": 54294057381,\n \"stddev_ns\": 80585107,\n \"avg_ts\": 9.430143,\n \"stddev_ts\": 0.013995,\n \"samples_ns\": [ 54376999549, 54216057502, 54289115093 ],\n \"samples_ts\": [ 9.41575, 9.4437, 9.43099 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T05:23:08Z", "avg_ns": 5694023335, "stddev_ns": 3379471, "avg_ts": 22.479716, "stddev_ts": 0.013342, "samples_ns": [ 5690267661, 5694986670, 5696815675 ], "samples_ts": [ 22.4945, 22.4759, 22.4687 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T05:23:31Z", "avg_ns": 54294057381, "stddev_ns": 80585107, "avg_ts": 9.430143, "stddev_ts": 0.013995, "samples_ns": [ 54376999549, 54216057502, 54289115093 ], "samples_ts": [ 9.41575, 9.4437, 9.43099 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 385 }, { "timestamp_utc": "2025-12-09T05:28:26.655964+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:26:15Z\",\n \"avg_ns\": 22760259105,\n \"stddev_ns\": 29538764,\n \"avg_ts\": 22.495375,\n \"stddev_ts\": 0.029173,\n \"samples_ns\": [ 22745397695, 22741102789, 22794276832 ],\n \"samples_ts\": [ 22.51, 22.5143, 22.4618 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:27:46Z\",\n \"avg_ns\": 13299608361,\n \"stddev_ns\": 49988180,\n \"avg_ts\": 9.624434,\n \"stddev_ts\": 0.036126,\n \"samples_ns\": [ 13354721597, 13257196103, 13286907385 ],\n \"samples_ts\": [ 9.58463, 9.65513, 9.63354 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T05:26:15Z", "avg_ns": 22760259105, "stddev_ns": 29538764, "avg_ts": 22.495375, "stddev_ts": 0.029173, "samples_ns": [ 22745397695, 22741102789, 22794276832 ], "samples_ts": [ 22.51, 22.5143, 22.4618 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T05:27:46Z", "avg_ns": 13299608361, "stddev_ns": 49988180, "avg_ts": 9.624434, "stddev_ts": 0.036126, "samples_ns": [ 13354721597, 13257196103, 13286907385 ], "samples_ts": [ 9.58463, 9.65513, 9.63354 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 386 }, { "timestamp_utc": "2025-12-09T05:32:43.437042+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:28:27Z\",\n \"avg_ns\": 22602460841,\n \"stddev_ns\": 15234487,\n \"avg_ts\": 22.652408,\n \"stddev_ts\": 0.015265,\n \"samples_ns\": [ 22601182204, 22587907458, 22618292863 ],\n \"samples_ts\": [ 22.6537, 22.667, 22.6365 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:29:57Z\",\n \"avg_ns\": 55098963437,\n \"stddev_ns\": 101718167,\n \"avg_ts\": 9.292392,\n \"stddev_ts\": 0.017137,\n \"samples_ns\": [ 55216104079, 55047814910, 55032971323 ],\n \"samples_ts\": [ 9.27266, 9.301, 9.30351 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T05:28:27Z", "avg_ns": 22602460841, "stddev_ns": 15234487, "avg_ts": 22.652408, "stddev_ts": 0.015265, "samples_ns": [ 22601182204, 22587907458, 22618292863 ], "samples_ts": [ 22.6537, 22.667, 22.6365 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T05:29:57Z", "avg_ns": 55098963437, "stddev_ns": 101718167, "avg_ts": 9.292392, "stddev_ts": 0.017137, "samples_ns": [ 55216104079, 55047814910, 55032971323 ], "samples_ts": [ 9.27266, 9.301, 9.30351 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 387 }, { "timestamp_utc": "2025-12-09T05:33:48.614245+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:32:44Z\",\n \"avg_ns\": 5623990396,\n \"stddev_ns\": 5720103,\n \"avg_ts\": 22.759656,\n \"stddev_ts\": 0.023135,\n \"samples_ns\": [ 5619257377, 5622368520, 5630345293 ],\n \"samples_ts\": [ 22.7788, 22.7662, 22.734 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:33:06Z\",\n \"avg_ns\": 13869208494,\n \"stddev_ns\": 32168652,\n \"avg_ts\": 9.229111,\n \"stddev_ts\": 0.021399,\n \"samples_ns\": [ 13838717371, 13866082530, 13902825583 ],\n \"samples_ts\": [ 9.24941, 9.23116, 9.20676 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T05:32:44Z", "avg_ns": 5623990396, "stddev_ns": 5720103, "avg_ts": 22.759656, "stddev_ts": 0.023135, "samples_ns": [ 5619257377, 5622368520, 5630345293 ], "samples_ts": [ 22.7788, 22.7662, 22.734 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T05:33:06Z", "avg_ns": 13869208494, "stddev_ns": 32168652, "avg_ts": 9.229111, "stddev_ts": 0.021399, "samples_ns": [ 13838717371, 13866082530, 13902825583 ], "samples_ts": [ 9.24941, 9.23116, 9.20676 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 388 }, { "timestamp_utc": "2025-12-09T05:36:54.946700+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:33:49Z\",\n \"avg_ns\": 5623098726,\n \"stddev_ns\": 5568078,\n \"avg_ts\": 22.763264,\n \"stddev_ts\": 0.022527,\n \"samples_ns\": [ 5621371381, 5629324718, 5618600081 ],\n \"samples_ts\": [ 22.7702, 22.7381, 22.7815 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:34:11Z\",\n \"avg_ns\": 54255020272,\n \"stddev_ns\": 155444949,\n \"avg_ts\": 9.436966,\n \"stddev_ts\": 0.027049,\n \"samples_ns\": [ 54402146147, 54092414868, 54270499802 ],\n \"samples_ts\": [ 9.41139, 9.46528, 9.43422 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T05:33:49Z", "avg_ns": 5623098726, "stddev_ns": 5568078, "avg_ts": 22.763264, "stddev_ts": 0.022527, "samples_ns": [ 5621371381, 5629324718, 5618600081 ], "samples_ts": [ 22.7702, 22.7381, 22.7815 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T05:34:11Z", "avg_ns": 54255020272, "stddev_ns": 155444949, "avg_ts": 9.436966, "stddev_ts": 0.027049, "samples_ns": [ 54402146147, 54092414868, 54270499802 ], "samples_ts": [ 9.41139, 9.46528, 9.43422 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 389 }, { "timestamp_utc": "2025-12-09T05:39:07.433107+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:36:55Z\",\n \"avg_ns\": 22587244962,\n \"stddev_ns\": 7255240,\n \"avg_ts\": 22.667662,\n \"stddev_ts\": 0.007280,\n \"samples_ns\": [ 22582878829, 22595620054, 22583236003 ],\n \"samples_ts\": [ 22.672, 22.6593, 22.6717 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:38:26Z\",\n \"avg_ns\": 13670229513,\n \"stddev_ns\": 16998156,\n \"avg_ts\": 9.363422,\n \"stddev_ts\": 0.011651,\n \"samples_ns\": [ 13679632581, 13650607846, 13680448113 ],\n \"samples_ts\": [ 9.35698, 9.37687, 9.35642 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T05:36:55Z", "avg_ns": 22587244962, "stddev_ns": 7255240, "avg_ts": 22.667662, "stddev_ts": 0.00728, "samples_ns": [ 22582878829, 22595620054, 22583236003 ], "samples_ts": [ 22.672, 22.6593, 22.6717 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T05:38:26Z", "avg_ns": 13670229513, "stddev_ns": 16998156, "avg_ts": 9.363422, "stddev_ts": 0.011651, "samples_ns": [ 13679632581, 13650607846, 13680448113 ], "samples_ts": [ 9.35698, 9.37687, 9.35642 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 390 }, { "timestamp_utc": "2025-12-09T05:43:26.026610+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:39:08Z\",\n \"avg_ns\": 22555901178,\n \"stddev_ns\": 10533048,\n \"avg_ts\": 22.699163,\n \"stddev_ts\": 0.010600,\n \"samples_ns\": [ 22564262778, 22559366472, 22544074286 ],\n \"samples_ts\": [ 22.6907, 22.6957, 22.7111 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:40:38Z\",\n \"avg_ns\": 55743669100,\n \"stddev_ns\": 53903168,\n \"avg_ts\": 9.184905,\n \"stddev_ts\": 0.008885,\n \"samples_ns\": [ 55787580658, 55759915191, 55683511451 ],\n \"samples_ts\": [ 9.17767, 9.18222, 9.19482 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T05:39:08Z", "avg_ns": 22555901178, "stddev_ns": 10533048, "avg_ts": 22.699163, "stddev_ts": 0.0106, "samples_ns": [ 22564262778, 22559366472, 22544074286 ], "samples_ts": [ 22.6907, 22.6957, 22.7111 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T05:40:38Z", "avg_ns": 55743669100, "stddev_ns": 53903168, "avg_ts": 9.184905, "stddev_ts": 0.008885, "samples_ns": [ 55787580658, 55759915191, 55683511451 ], "samples_ts": [ 9.17767, 9.18222, 9.19482 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 391 }, { "timestamp_utc": "2025-12-09T05:44:31.678442+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:43:26Z\",\n \"avg_ns\": 5676880801,\n \"stddev_ns\": 2870510,\n \"avg_ts\": 22.547597,\n \"stddev_ts\": 0.011404,\n \"samples_ns\": [ 5678933399, 5678108379, 5673600625 ],\n \"samples_ts\": [ 22.5394, 22.5427, 22.5606 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:43:49Z\",\n \"avg_ns\": 13949854511,\n \"stddev_ns\": 30816376,\n \"avg_ts\": 9.175753,\n \"stddev_ts\": 0.020262,\n \"samples_ns\": [ 13921089747, 13946096432, 13982377355 ],\n \"samples_ts\": [ 9.19468, 9.1782, 9.15438 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T05:43:26Z", "avg_ns": 5676880801, "stddev_ns": 2870510, "avg_ts": 22.547597, "stddev_ts": 0.011404, "samples_ns": [ 5678933399, 5678108379, 5673600625 ], "samples_ts": [ 22.5394, 22.5427, 22.5606 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T05:43:49Z", "avg_ns": 13949854511, "stddev_ns": 30816376, "avg_ts": 9.175753, "stddev_ts": 0.020262, "samples_ns": [ 13921089747, 13946096432, 13982377355 ], "samples_ts": [ 9.19468, 9.1782, 9.15438 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 392 }, { "timestamp_utc": "2025-12-09T05:47:43.893065+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:44:32Z\",\n \"avg_ns\": 5638812923,\n \"stddev_ns\": 3133664,\n \"avg_ts\": 22.699818,\n \"stddev_ts\": 0.012615,\n \"samples_ns\": [ 5640179460, 5641030284, 5635229026 ],\n \"samples_ts\": [ 22.6943, 22.6909, 22.7142 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:44:55Z\",\n \"avg_ns\": 56184411836,\n \"stddev_ns\": 120353193,\n \"avg_ts\": 9.112876,\n \"stddev_ts\": 0.019497,\n \"samples_ns\": [ 56322865550, 56125565823, 56104804136 ],\n \"samples_ts\": [ 9.09045, 9.1224, 9.12578 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T05:44:32Z", "avg_ns": 5638812923, "stddev_ns": 3133664, "avg_ts": 22.699818, "stddev_ts": 0.012615, "samples_ns": [ 5640179460, 5641030284, 5635229026 ], "samples_ts": [ 22.6943, 22.6909, 22.7142 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T05:44:55Z", "avg_ns": 56184411836, "stddev_ns": 120353193, "avg_ts": 9.112876, "stddev_ts": 0.019497, "samples_ns": [ 56322865550, 56125565823, 56104804136 ], "samples_ts": [ 9.09045, 9.1224, 9.12578 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 393 }, { "timestamp_utc": "2025-12-09T05:49:57.825604+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:47:44Z\",\n \"avg_ns\": 22835260340,\n \"stddev_ns\": 10417902,\n \"avg_ts\": 22.421469,\n \"stddev_ts\": 0.010225,\n \"samples_ns\": [ 22831591690, 22827175353, 22847013979 ],\n \"samples_ts\": [ 22.4251, 22.4294, 22.4099 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:49:16Z\",\n \"avg_ns\": 13816441810,\n \"stddev_ns\": 22694535,\n \"avg_ts\": 9.264341,\n \"stddev_ts\": 0.015230,\n \"samples_ns\": [ 13832570272, 13790491304, 13826263856 ],\n \"samples_ts\": [ 9.25352, 9.28176, 9.25774 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T05:47:44Z", "avg_ns": 22835260340, "stddev_ns": 10417902, "avg_ts": 22.421469, "stddev_ts": 0.010225, "samples_ns": [ 22831591690, 22827175353, 22847013979 ], "samples_ts": [ 22.4251, 22.4294, 22.4099 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T05:49:16Z", "avg_ns": 13816441810, "stddev_ns": 22694535, "avg_ts": 9.264341, "stddev_ts": 0.01523, "samples_ns": [ 13832570272, 13790491304, 13826263856 ], "samples_ts": [ 9.25352, 9.28176, 9.25774 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 394 }, { "timestamp_utc": "2025-12-09T05:54:13.755542+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:49:58Z\",\n \"avg_ns\": 23074927326,\n \"stddev_ns\": 14434082,\n \"avg_ts\": 22.188591,\n \"stddev_ts\": 0.013881,\n \"samples_ns\": [ 23088140051, 23077117985, 23059523943 ],\n \"samples_ts\": [ 22.1759, 22.1865, 22.2034 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:51:31Z\",\n \"avg_ns\": 54170707927,\n \"stddev_ns\": 16471228,\n \"avg_ts\": 9.451603,\n \"stddev_ts\": 0.002874,\n \"samples_ns\": [ 54153184837, 54185868940, 54173070005 ],\n \"samples_ts\": [ 9.45466, 9.44896, 9.45119 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T05:49:58Z", "avg_ns": 23074927326, "stddev_ns": 14434082, "avg_ts": 22.188591, "stddev_ts": 0.013881, "samples_ns": [ 23088140051, 23077117985, 23059523943 ], "samples_ts": [ 22.1759, 22.1865, 22.2034 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T05:51:31Z", "avg_ns": 54170707927, "stddev_ns": 16471228, "avg_ts": 9.451603, "stddev_ts": 0.002874, "samples_ns": [ 54153184837, 54185868940, 54173070005 ], "samples_ts": [ 9.45466, 9.44896, 9.45119 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 395 }, { "timestamp_utc": "2025-12-09T05:54:48.376251+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:54:14Z\",\n \"avg_ns\": 2883255397,\n \"stddev_ns\": 5702666,\n \"avg_ts\": 44.394379,\n \"stddev_ts\": 0.087838,\n \"samples_ns\": [ 2888568957, 2883966845, 2877230389 ],\n \"samples_ts\": [ 44.3126, 44.3833, 44.4872 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:54:26Z\",\n \"avg_ns\": 7352639988,\n \"stddev_ns\": 19422153,\n \"avg_ts\": 17.408794,\n \"stddev_ts\": 0.045989,\n \"samples_ns\": [ 7352945554, 7333066854, 7371907556 ],\n \"samples_ts\": [ 17.408, 17.4552, 17.3632 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T05:54:14Z", "avg_ns": 2883255397, "stddev_ns": 5702666, "avg_ts": 44.394379, "stddev_ts": 0.087838, "samples_ns": [ 2888568957, 2883966845, 2877230389 ], "samples_ts": [ 44.3126, 44.3833, 44.4872 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T05:54:26Z", "avg_ns": 7352639988, "stddev_ns": 19422153, "avg_ts": 17.408794, "stddev_ts": 0.045989, "samples_ns": [ 7352945554, 7333066854, 7371907556 ], "samples_ts": [ 17.408, 17.4552, 17.3632 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 512, "ubatch_size": 128, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 396 }, { "timestamp_utc": "2025-12-09T05:56:34.318488+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:54:49Z\",\n \"avg_ns\": 3259899640,\n \"stddev_ns\": 444547032,\n \"avg_ts\": 39.736575,\n \"stddev_ts\": 5.197966,\n \"samples_ns\": [ 2882896965, 3146699467, 3750102488 ],\n \"samples_ts\": [ 44.3998, 40.6775, 34.1324 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:55:01Z\",\n \"avg_ns\": 30734464495,\n \"stddev_ns\": 1227690565,\n \"avg_ts\": 16.676279,\n \"stddev_ts\": 0.655606,\n \"samples_ns\": [ 32104939810, 29735264765, 30363188910 ],\n \"samples_ts\": [ 15.9477, 17.2186, 16.8625 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T05:54:49Z", "avg_ns": 3259899640, "stddev_ns": 444547032, "avg_ts": 39.736575, "stddev_ts": 5.197966, "samples_ns": [ 2882896965, 3146699467, 3750102488 ], "samples_ts": [ 44.3998, 40.6775, 34.1324 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T05:55:01Z", "avg_ns": 30734464495, "stddev_ns": 1227690565, "avg_ts": 16.676279, "stddev_ts": 0.655606, "samples_ns": [ 32104939810, 29735264765, 30363188910 ], "samples_ts": [ 15.9477, 17.2186, 16.8625 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 512, "ubatch_size": 128, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 397 }, { "timestamp_utc": "2025-12-09T06:00:15.109138+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:56:35Z\",\n \"avg_ns\": 40940834623,\n \"stddev_ns\": 141230461,\n \"avg_ts\": 12.505951,\n \"stddev_ts\": 0.043058,\n \"samples_ns\": [ 40872278344, 41103257329, 40846968197 ],\n \"samples_ts\": [ 12.5268, 12.4564, 12.5346 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:59:11Z\",\n \"avg_ns\": 21052041564,\n \"stddev_ns\": 2734293586,\n \"avg_ts\": 8.701940,\n \"stddev_ts\": 7.089982,\n \"samples_ns\": [ 28474794652, 27101844373, 7579485669 ],\n \"samples_ts\": [ 4.4952, 4.72293, 16.8877 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T05:56:35Z", "avg_ns": 40940834623, "stddev_ns": 141230461, "avg_ts": 12.505951, "stddev_ts": 0.043058, "samples_ns": [ 40872278344, 41103257329, 40846968197 ], "samples_ts": [ 12.5268, 12.4564, 12.5346 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T05:59:11Z", "avg_ns": 21052041564, "stddev_ns": 2734293586, "avg_ts": 8.70194, "stddev_ts": 7.089982, "samples_ns": [ 28474794652, 27101844373, 7579485669 ], "samples_ts": [ 4.4952, 4.72293, 16.8877 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 512, "ubatch_size": 128, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 398 }, { "timestamp_utc": "2025-12-09T06:05:26.731507+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:00:16Z\",\n \"avg_ns\": 11602433222,\n \"stddev_ns\": 6155895,\n \"avg_ts\": 44.128683,\n \"stddev_ts\": 0.023407,\n \"samples_ns\": [ 11596176257, 11608478832, 11602644579 ],\n \"samples_ts\": [ 44.1525, 44.1057, 44.1279 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:01:02Z\",\n \"avg_ns\": 88055571909,\n \"stddev_ns\": 1296867326,\n \"avg_ts\": 6.215235,\n \"stddev_ts\": 2.000845,\n \"samples_ns\": [ 88506057247, 114680891262, 60979767219 ],\n \"samples_ts\": [ 5.78491, 4.46456, 8.39623 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T06:00:16Z", "avg_ns": 11602433222, "stddev_ns": 6155895, "avg_ts": 44.128683, "stddev_ts": 0.023407, "samples_ns": [ 11596176257, 11608478832, 11602644579 ], "samples_ts": [ 44.1525, 44.1057, 44.1279 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T06:01:02Z", "avg_ns": 88055571909, "stddev_ns": 1296867326, "avg_ts": 6.215235, "stddev_ts": 2.000845, "samples_ns": [ 88506057247, 114680891262, 60979767219 ], "samples_ts": [ 5.78491, 4.46456, 8.39623 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 512, "ubatch_size": 128, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 399 }, { "timestamp_utc": "2025-12-09T06:06:01.215623+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:05:27Z\",\n \"avg_ns\": 2871898505,\n \"stddev_ns\": 4144878,\n \"avg_ts\": 44.569882,\n \"stddev_ts\": 0.064316,\n \"samples_ns\": [ 2871620151, 2876175545, 2867899819 ],\n \"samples_ts\": [ 44.5741, 44.5035, 44.632 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:05:39Z\",\n \"avg_ns\": 7316979933,\n \"stddev_ns\": 23797464,\n \"avg_ts\": 17.493680,\n \"stddev_ts\": 0.056934,\n \"samples_ns\": [ 7291670446, 7320367689, 7338901666 ],\n \"samples_ts\": [ 17.5543, 17.4855, 17.4413 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T06:05:27Z", "avg_ns": 2871898505, "stddev_ns": 4144878, "avg_ts": 44.569882, "stddev_ts": 0.064316, "samples_ns": [ 2871620151, 2876175545, 2867899819 ], "samples_ts": [ 44.5741, 44.5035, 44.632 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T06:05:39Z", "avg_ns": 7316979933, "stddev_ns": 23797464, "avg_ts": 17.49368, "stddev_ts": 0.056934, "samples_ns": [ 7291670446, 7320367689, 7338901666 ], "samples_ts": [ 17.5543, 17.4855, 17.4413 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 512, "ubatch_size": 256, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 400 }, { "timestamp_utc": "2025-12-09T06:10:35.991558+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:06:02Z\",\n \"avg_ns\": 4122652119,\n \"stddev_ns\": 1211337054,\n \"avg_ts\": 32.751606,\n \"stddev_ts\": 8.774919,\n \"samples_ns\": [ 3139963666, 3751975463, 5476017230 ],\n \"samples_ts\": [ 40.7648, 34.1154, 23.3747 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:06:17Z\",\n \"avg_ns\": 86082915272,\n \"stddev_ns\": 2470454459,\n \"avg_ts\": 8.277975,\n \"stddev_ts\": 6.481674,\n \"samples_ns\": [ 113515071663, 112251125043, 32482549111 ],\n \"samples_ts\": [ 4.51041, 4.5612, 15.7623 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T06:06:02Z", "avg_ns": 4122652119, "stddev_ns": 1211337054, "avg_ts": 32.751606, "stddev_ts": 8.774919, "samples_ns": [ 3139963666, 3751975463, 5476017230 ], "samples_ts": [ 40.7648, 34.1154, 23.3747 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T06:06:17Z", "avg_ns": 86082915272, "stddev_ns": 2470454459, "avg_ts": 8.277975, "stddev_ts": 6.481674, "samples_ns": [ 113515071663, 112251125043, 32482549111 ], "samples_ts": [ 4.51041, 4.5612, 15.7623 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 512, "ubatch_size": 256, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 401 }, { "timestamp_utc": "2025-12-09T06:13:15.069563+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:10:36Z\",\n \"avg_ns\": 20610458940,\n \"stddev_ns\": 1816513687,\n \"avg_ts\": 32.392718,\n \"stddev_ts\": 16.362703,\n \"samples_ns\": [ 11506491851, 13160265841, 37164619129 ],\n \"samples_ts\": [ 44.4966, 38.905, 13.7765 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:11:50Z\",\n \"avg_ns\": 28126029231,\n \"stddev_ns\": 91209794,\n \"avg_ts\": 4.550976,\n \"stddev_ts\": 0.014764,\n \"samples_ns\": [ 28031619962, 28213660809, 28132806924 ],\n \"samples_ts\": [ 4.56627, 4.53681, 4.54985 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T06:10:36Z", "avg_ns": 20610458940, "stddev_ns": 1816513687, "avg_ts": 32.392718, "stddev_ts": 16.362703, "samples_ns": [ 11506491851, 13160265841, 37164619129 ], "samples_ts": [ 44.4966, 38.905, 13.7765 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T06:11:50Z", "avg_ns": 28126029231, "stddev_ns": 91209794, "avg_ts": 4.550976, "stddev_ts": 0.014764, "samples_ns": [ 28031619962, 28213660809, 28132806924 ], "samples_ts": [ 4.56627, 4.53681, 4.54985 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 512, "ubatch_size": 256, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 402 }, { "timestamp_utc": "2025-12-09T06:18:37.944943+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:13:16Z\",\n \"avg_ns\": 28189729075,\n \"stddev_ns\": 1928625124,\n \"avg_ts\": 24.301697,\n \"stddev_ts\": 17.593082,\n \"samples_ns\": [ 40545644820, 32527427783, 11496114623 ],\n \"samples_ts\": [ 12.6277, 15.7406, 44.5368 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:15:21Z\",\n \"avg_ns\": 65352010894,\n \"stddev_ns\": 3778172106,\n \"avg_ts\": 10.429055,\n \"stddev_ts\": 6.404579,\n \"samples_ns\": [ 29665361267, 54067066993, 112323604423 ],\n \"samples_ts\": [ 17.2592, 9.46972, 4.55826 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T06:13:16Z", "avg_ns": 28189729075, "stddev_ns": 1928625124, "avg_ts": 24.301697, "stddev_ts": 17.593082, "samples_ns": [ 40545644820, 32527427783, 11496114623 ], "samples_ts": [ 12.6277, 15.7406, 44.5368 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T06:15:21Z", "avg_ns": 65352010894, "stddev_ns": 3778172106, "avg_ts": 10.429055, "stddev_ts": 6.404579, "samples_ns": [ 29665361267, 54067066993, 112323604423 ], "samples_ts": [ 17.2592, 9.46972, 4.55826 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 512, "ubatch_size": 256, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 403 }, { "timestamp_utc": "2025-12-09T06:20:16.866030+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:18:39Z\",\n \"avg_ns\": 10205395574,\n \"stddev_ns\": 62110548,\n \"avg_ts\": 12.542695,\n \"stddev_ts\": 0.076380,\n \"samples_ns\": [ 10209236922, 10265496294, 10141453506 ],\n \"samples_ts\": [ 12.5377, 12.469, 12.6215 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:19:20Z\",\n \"avg_ns\": 18702787720,\n \"stddev_ns\": 4066184215,\n \"avg_ts\": 9.422791,\n \"stddev_ts\": 7.108428,\n \"samples_ns\": [ 27711485199, 21119428920, 7277449041 ],\n \"samples_ts\": [ 4.61902, 6.06077, 17.5886 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T06:18:39Z", "avg_ns": 10205395574, "stddev_ns": 62110548, "avg_ts": 12.542695, "stddev_ts": 0.07638, "samples_ns": [ 10209236922, 10265496294, 10141453506 ], "samples_ts": [ 12.5377, 12.469, 12.6215 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T06:19:20Z", "avg_ns": 18702787720, "stddev_ns": 4066184215, "avg_ts": 9.422791, "stddev_ts": 7.108428, "samples_ns": [ 27711485199, 21119428920, 7277449041 ], "samples_ts": [ 4.61902, 6.06077, 17.5886 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 512, "ubatch_size": 512, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 404 }, { "timestamp_utc": "2025-12-09T06:24:06.865596+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:20:17Z\",\n \"avg_ns\": 2920993486,\n \"stddev_ns\": 3545928,\n \"avg_ts\": 43.820750,\n \"stddev_ts\": 0.053159,\n \"samples_ns\": [ 2925085433, 2918822655, 2919072370 ],\n \"samples_ts\": [ 43.7594, 43.8533, 43.8495 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:20:29Z\",\n \"avg_ns\": 72395097171,\n \"stddev_ns\": 1867162442,\n \"avg_ts\": 9.514526,\n \"stddev_ts\": 6.758249,\n \"samples_ns\": [ 29729751574, 76187306810, 111268233131 ],\n \"samples_ts\": [ 17.2218, 6.72028, 4.60149 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T06:20:17Z", "avg_ns": 2920993486, "stddev_ns": 3545928, "avg_ts": 43.82075, "stddev_ts": 0.053159, "samples_ns": [ 2925085433, 2918822655, 2919072370 ], "samples_ts": [ 43.7594, 43.8533, 43.8495 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T06:20:29Z", "avg_ns": 72395097171, "stddev_ns": 1867162442, "avg_ts": 9.514526, "stddev_ts": 6.758249, "samples_ns": [ 29729751574, 76187306810, 111268233131 ], "samples_ts": [ 17.2218, 6.72028, 4.60149 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 512, "ubatch_size": 512, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 405 }, { "timestamp_utc": "2025-12-09T06:25:59.108130+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:24:08Z\",\n \"avg_ns\": 15904898938,\n \"stddev_ns\": 451823041,\n \"avg_ts\": 36.383130,\n \"stddev_ts\": 13.418433,\n \"samples_ns\": [ 24510661216, 11606256891, 11597778709 ],\n \"samples_ts\": [ 20.8889, 44.1141, 44.1464 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:25:36Z\",\n \"avg_ns\": 7394265286,\n \"stddev_ns\": 15662411,\n \"avg_ts\": 17.310764,\n \"stddev_ts\": 0.036711,\n \"samples_ns\": [ 7404424581, 7402143081, 7376228197 ],\n \"samples_ts\": [ 17.287, 17.2923, 17.353 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T06:24:08Z", "avg_ns": 15904898938, "stddev_ns": 451823041, "avg_ts": 36.38313, "stddev_ts": 13.418433, "samples_ns": [ 24510661216, 11606256891, 11597778709 ], "samples_ts": [ 20.8889, 44.1141, 44.1464 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T06:25:36Z", "avg_ns": 7394265286, "stddev_ns": 15662411, "avg_ts": 17.310764, "stddev_ts": 0.036711, "samples_ns": [ 7404424581, 7402143081, 7376228197 ], "samples_ts": [ 17.287, 17.2923, 17.353 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 512, "ubatch_size": 512, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 406 }, { "timestamp_utc": "2025-12-09T06:31:06.662052+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:25:59Z\",\n \"avg_ns\": 39478800835,\n \"stddev_ns\": 1844651962,\n \"avg_ts\": 12.988414,\n \"stddev_ts\": 0.623695,\n \"samples_ns\": [ 37349016953, 40571192687, 40516192867 ],\n \"samples_ts\": [ 13.7085, 12.6198, 12.6369 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:28:11Z\",\n \"avg_ns\": 58148490410,\n \"stddev_ns\": 4018641954,\n \"avg_ts\": 12.608545,\n \"stddev_ts\": 7.002434,\n \"samples_ns\": [ 112892617453, 31494710816, 30058142961 ],\n \"samples_ts\": [ 4.53528, 16.2567, 17.0337 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T06:25:59Z", "avg_ns": 39478800835, "stddev_ns": 1844651962, "avg_ts": 12.988414, "stddev_ts": 0.623695, "samples_ns": [ 37349016953, 40571192687, 40516192867 ], "samples_ts": [ 13.7085, 12.6198, 12.6369 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T06:28:11Z", "avg_ns": 58148490410, "stddev_ns": 4018641954, "avg_ts": 12.608545, "stddev_ts": 7.002434, "samples_ns": [ 112892617453, 31494710816, 30058142961 ], "samples_ts": [ 4.53528, 16.2567, 17.0337 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 512, "ubatch_size": 512, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 407 }, { "timestamp_utc": "2025-12-09T06:32:59.804880+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:31:07Z\",\n \"avg_ns\": 8213943819,\n \"stddev_ns\": 4129847776,\n \"avg_ts\": 17.228464,\n \"stddev_ts\": 7.253413,\n \"samples_ns\": [ 5001800943, 9513626282, 10126404234 ],\n \"samples_ts\": [ 25.5908, 13.4544, 12.6402 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:31:35Z\",\n \"avg_ns\": 27812330358,\n \"stddev_ns\": 311125352,\n \"avg_ts\": 4.602661,\n \"stddev_ts\": 0.051658,\n \"samples_ns\": [ 28087383921, 27474658284, 27874948869 ],\n \"samples_ts\": [ 4.55721, 4.65884, 4.59194 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T06:31:07Z", "avg_ns": 8213943819, "stddev_ns": 4129847776, "avg_ts": 17.228464, "stddev_ts": 7.253413, "samples_ns": [ 5001800943, 9513626282, 10126404234 ], "samples_ts": [ 25.5908, 13.4544, 12.6402 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T06:31:35Z", "avg_ns": 27812330358, "stddev_ns": 311125352, "avg_ts": 4.602661, "stddev_ts": 0.051658, "samples_ns": [ 28087383921, 27474658284, 27874948869 ], "samples_ts": [ 4.55721, 4.65884, 4.59194 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 408 }, { "timestamp_utc": "2025-12-09T06:36:28.679154+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:33:01Z\",\n \"avg_ns\": 10216413873,\n \"stddev_ns\": 30231237,\n \"avg_ts\": 12.528931,\n \"stddev_ts\": 0.037133,\n \"samples_ns\": [ 10181776732, 10237491312, 10229973575 ],\n \"samples_ts\": [ 12.5715, 12.5031, 12.5123 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:33:42Z\",\n \"avg_ns\": 55236326888,\n \"stddev_ns\": 772752475,\n \"avg_ts\": 11.644460,\n \"stddev_ts\": 5.915739,\n \"samples_ns\": [ 94325905110, 29759217583, 41623857972 ],\n \"samples_ts\": [ 5.42799, 17.2048, 12.3006 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T06:33:01Z", "avg_ns": 10216413873, "stddev_ns": 30231237, "avg_ts": 12.528931, "stddev_ts": 0.037133, "samples_ns": [ 10181776732, 10237491312, 10229973575 ], "samples_ts": [ 12.5715, 12.5031, 12.5123 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T06:33:42Z", "avg_ns": 55236326888, "stddev_ns": 772752475, "avg_ts": 11.64446, "stddev_ts": 5.915739, "samples_ns": [ 94325905110, 29759217583, 41623857972 ], "samples_ts": [ 5.42799, 17.2048, 12.3006 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 409 }, { "timestamp_utc": "2025-12-09T06:40:15.628259+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:36:30Z\",\n \"avg_ns\": 41207787974,\n \"stddev_ns\": 89841550,\n \"avg_ts\": 12.424875,\n \"stddev_ts\": 0.027120,\n \"samples_ns\": [ 41105017458, 41246918846, 41271427618 ],\n \"samples_ts\": [ 12.4559, 12.413, 12.4057 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:39:15Z\",\n \"avg_ns\": 20014672879,\n \"stddev_ns\": 1464175909,\n \"avg_ts\": 9.047843,\n \"stddev_ts\": 7.237191,\n \"samples_ns\": [ 27713485190, 24974004676, 7356528772 ],\n \"samples_ts\": [ 4.61869, 5.12533, 17.3995 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T06:36:30Z", "avg_ns": 41207787974, "stddev_ns": 89841550, "avg_ts": 12.424875, "stddev_ts": 0.02712, "samples_ns": [ 41105017458, 41246918846, 41271427618 ], "samples_ts": [ 12.4559, 12.413, 12.4057 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T06:39:15Z", "avg_ns": 20014672879, "stddev_ns": 1464175909, "avg_ts": 9.047843, "stddev_ts": 7.237191, "samples_ns": [ 27713485190, 24974004676, 7356528772 ], "samples_ts": [ 4.61869, 5.12533, 17.3995 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 410 }, { "timestamp_utc": "2025-12-09T06:45:24.908041+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:40:16Z\",\n \"avg_ns\": 11610334195,\n \"stddev_ns\": 7602695,\n \"avg_ts\": 44.098657,\n \"stddev_ts\": 0.028876,\n \"samples_ns\": [ 11618097695, 11602903216, 11610001674 ],\n \"samples_ts\": [ 44.0692, 44.1269, 44.0999 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:41:02Z\",\n \"avg_ns\": 87268438617,\n \"stddev_ns\": 3841957043,\n \"avg_ts\": 6.198187,\n \"stddev_ts\": 1.786462,\n \"samples_ns\": [ 86233812869, 112367321952, 63204181032 ],\n \"samples_ts\": [ 5.93735, 4.55648, 8.10073 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T06:40:16Z", "avg_ns": 11610334195, "stddev_ns": 7602695, "avg_ts": 44.098657, "stddev_ts": 0.028876, "samples_ns": [ 11618097695, 11602903216, 11610001674 ], "samples_ts": [ 44.0692, 44.1269, 44.0999 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T06:41:02Z", "avg_ns": 87268438617, "stddev_ns": 3841957043, "avg_ts": 6.198187, "stddev_ts": 1.786462, "samples_ns": [ 86233812869, 112367321952, 63204181032 ], "samples_ts": [ 5.93735, 4.55648, 8.10073 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 411 }, { "timestamp_utc": "2025-12-09T06:45:59.461615+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:45:25Z\",\n \"avg_ns\": 2873007048,\n \"stddev_ns\": 1304166,\n \"avg_ts\": 44.552629,\n \"stddev_ts\": 0.020204,\n \"samples_ns\": [ 2872685488, 2871894869, 2874440788 ],\n \"samples_ts\": [ 44.5576, 44.5699, 44.5304 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:45:37Z\",\n \"avg_ns\": 7342721631,\n \"stddev_ns\": 30741612,\n \"avg_ts\": 17.432432,\n \"stddev_ts\": 0.073045,\n \"samples_ns\": [ 7346861933, 7371183030, 7310119932 ],\n \"samples_ts\": [ 17.4224, 17.3649, 17.51 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T06:45:25Z", "avg_ns": 2873007048, "stddev_ns": 1304166, "avg_ts": 44.552629, "stddev_ts": 0.020204, "samples_ns": [ 2872685488, 2871894869, 2874440788 ], "samples_ts": [ 44.5576, 44.5699, 44.5304 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T06:45:37Z", "avg_ns": 7342721631, "stddev_ns": 30741612, "avg_ts": 17.432432, "stddev_ts": 0.073045, "samples_ns": [ 7346861933, 7371183030, 7310119932 ], "samples_ts": [ 17.4224, 17.3649, 17.51 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 412 }, { "timestamp_utc": "2025-12-09T06:50:33.723279+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:46:00Z\",\n \"avg_ns\": 3515636988,\n \"stddev_ns\": 686716071,\n \"avg_ts\": 37.339961,\n \"stddev_ts\": 7.195355,\n \"samples_ns\": [ 2870109709, 3439590074, 4237211182 ],\n \"samples_ts\": [ 44.5976, 37.2137, 30.2085 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:46:13Z\",\n \"avg_ns\": 86581877261,\n \"stddev_ns\": 2156113600,\n \"avg_ts\": 7.509895,\n \"stddev_ts\": 4.986786,\n \"samples_ns\": [ 109165838819, 111989936087, 38589856877 ],\n \"samples_ts\": [ 4.69011, 4.57184, 13.2677 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T06:46:00Z", "avg_ns": 3515636988, "stddev_ns": 686716071, "avg_ts": 37.339961, "stddev_ts": 7.195355, "samples_ns": [ 2870109709, 3439590074, 4237211182 ], "samples_ts": [ 44.5976, 37.2137, 30.2085 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T06:46:13Z", "avg_ns": 86581877261, "stddev_ns": 2156113600, "avg_ts": 7.509895, "stddev_ts": 4.986786, "samples_ns": [ 109165838819, 111989936087, 38589856877 ], "samples_ts": [ 4.69011, 4.57184, 13.2677 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 413 }, { "timestamp_utc": "2025-12-09T06:53:06.541736+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:50:34Z\",\n \"avg_ns\": 18578381672,\n \"stddev_ns\": 4150273218,\n \"avg_ts\": 34.213090,\n \"stddev_ts\": 15.849277,\n \"samples_ns\": [ 11533323837, 12105713027, 32096108153 ],\n \"samples_ts\": [ 44.3931, 42.2941, 15.9521 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:51:41Z\",\n \"avg_ns\": 28091945930,\n \"stddev_ns\": 320259659,\n \"avg_ts\": 4.556862,\n \"stddev_ts\": 0.052046,\n \"samples_ns\": [ 28393926192, 27756098583, 28125813015 ],\n \"samples_ts\": [ 4.50801, 4.6116, 4.55098 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T06:50:34Z", "avg_ns": 18578381672, "stddev_ns": 4150273218, "avg_ts": 34.21309, "stddev_ts": 15.849277, "samples_ns": [ 11533323837, 12105713027, 32096108153 ], "samples_ts": [ 44.3931, 42.2941, 15.9521 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T06:51:41Z", "avg_ns": 28091945930, "stddev_ns": 320259659, "avg_ts": 4.556862, "stddev_ts": 0.052046, "samples_ns": [ 28393926192, 27756098583, 28125813015 ], "samples_ts": [ 4.50801, 4.6116, 4.55098 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 414 }, { "timestamp_utc": "2025-12-09T06:58:29.843445+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:53:08Z\",\n \"avg_ns\": 30204718373,\n \"stddev_ns\": 2183368718,\n \"avg_ts\": 23.463347,\n \"stddev_ts\": 18.208521,\n \"samples_ns\": [ 40491956110, 38612891666, 11509307344 ],\n \"samples_ts\": [ 12.6445, 13.2598, 44.4857 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:55:19Z\",\n \"avg_ns\": 63404550032,\n \"stddev_ns\": 3955404398,\n \"avg_ts\": 10.790338,\n \"stddev_ts\": 6.319767,\n \"samples_ns\": [ 29767761519, 48270356801, 112175531777 ],\n \"samples_ts\": [ 17.1998, 10.6069, 4.56428 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T06:53:08Z", "avg_ns": 30204718373, "stddev_ns": 2183368718, "avg_ts": 23.463347, "stddev_ts": 18.208521, "samples_ns": [ 40491956110, 38612891666, 11509307344 ], "samples_ts": [ 12.6445, 13.2598, 44.4857 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T06:55:19Z", "avg_ns": 63404550032, "stddev_ns": 3955404398, "avg_ts": 10.790338, "stddev_ts": 6.319767, "samples_ns": [ 29767761519, 48270356801, 112175531777 ], "samples_ts": [ 17.1998, 10.6069, 4.56428 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 415 }, { "timestamp_utc": "2025-12-09T07:00:14.865705+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:58:31Z\",\n \"avg_ns\": 10193073651,\n \"stddev_ns\": 44933482,\n \"avg_ts\": 12.557709,\n \"stddev_ts\": 0.055249,\n \"samples_ns\": [ 10243570012, 10178149997, 10157500945 ],\n \"samples_ts\": [ 12.4956, 12.576, 12.6015 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:59:12Z\",\n \"avg_ns\": 20758438533,\n \"stddev_ns\": 2265934488,\n \"avg_ts\": 8.887257,\n \"stddev_ts\": 7.314085,\n \"samples_ns\": [ 27797295672, 27093071829, 7384948099 ],\n \"samples_ts\": [ 4.60476, 4.72446, 17.3326 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T06:58:31Z", "avg_ns": 10193073651, "stddev_ns": 44933482, "avg_ts": 12.557709, "stddev_ts": 0.055249, "samples_ns": [ 10243570012, 10178149997, 10157500945 ], "samples_ts": [ 12.4956, 12.576, 12.6015 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T06:59:12Z", "avg_ns": 20758438533, "stddev_ns": 2265934488, "avg_ts": 8.887257, "stddev_ts": 7.314085, "samples_ns": [ 27797295672, 27093071829, 7384948099 ], "samples_ts": [ 4.60476, 4.72446, 17.3326 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 416 }, { "timestamp_utc": "2025-12-09T07:03:59.668246+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:00:15Z\",\n \"avg_ns\": 2884652269,\n \"stddev_ns\": 5727586,\n \"avg_ts\": 44.372883,\n \"stddev_ts\": 0.088204,\n \"samples_ns\": [ 2878044643, 2888200435, 2887711729 ],\n \"samples_ts\": [ 44.4746, 44.3183, 44.3258 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:00:27Z\",\n \"avg_ns\": 70721140855,\n \"stddev_ns\": 4210915805,\n \"avg_ts\": 9.685576,\n \"stddev_ts\": 6.640001,\n \"samples_ns\": [ 29792655753, 69989788932, 112380977880 ],\n \"samples_ts\": [ 17.1854, 7.31535, 4.55593 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T07:00:15Z", "avg_ns": 2884652269, "stddev_ns": 5727586, "avg_ts": 44.372883, "stddev_ts": 0.088204, "samples_ns": [ 2878044643, 2888200435, 2887711729 ], "samples_ts": [ 44.4746, 44.3183, 44.3258 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T07:00:27Z", "avg_ns": 70721140855, "stddev_ns": 4210915805, "avg_ts": 9.685576, "stddev_ts": 6.640001, "samples_ns": [ 29792655753, 69989788932, 112380977880 ], "samples_ts": [ 17.1854, 7.31535, 4.55593 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 417 }, { "timestamp_utc": "2025-12-09T07:05:57.275575+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:04:01Z\",\n \"avg_ns\": 17628087376,\n \"stddev_ns\": 3939361678,\n \"avg_ts\": 35.099825,\n \"stddev_ts\": 15.424697,\n \"samples_ns\": [ 29614316109, 11629373805, 11640572216 ],\n \"samples_ts\": [ 17.2889, 44.0264, 43.9841 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:05:34Z\",\n \"avg_ns\": 7419055616,\n \"stddev_ns\": 18634463,\n \"avg_ts\": 17.252942,\n \"stddev_ts\": 0.043348,\n \"samples_ns\": [ 7436792227, 7420736619, 7399638004 ],\n \"samples_ts\": [ 17.2117, 17.249, 17.2981 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T07:04:01Z", "avg_ns": 17628087376, "stddev_ns": 3939361678, "avg_ts": 35.099825, "stddev_ts": 15.424697, "samples_ns": [ 29614316109, 11629373805, 11640572216 ], "samples_ts": [ 17.2889, 44.0264, 43.9841 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T07:05:34Z", "avg_ns": 7419055616, "stddev_ns": 18634463, "avg_ts": 17.252942, "stddev_ts": 0.043348, "samples_ns": [ 7436792227, 7420736619, 7399638004 ], "samples_ts": [ 17.2117, 17.249, 17.2981 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 418 }, { "timestamp_utc": "2025-12-09T07:11:04.154707+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:05:58Z\",\n \"avg_ns\": 38088396475,\n \"stddev_ns\": 4019918701,\n \"avg_ts\": 13.549558,\n \"stddev_ts\": 1.522768,\n \"samples_ns\": [ 33447035511, 40464507904, 40353646012 ],\n \"samples_ts\": [ 15.3078, 12.6531, 12.6878 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:08:04Z\",\n \"avg_ns\": 59650427554,\n \"stddev_ns\": 958792515,\n \"avg_ts\": 11.873129,\n \"stddev_ts\": 6.553584,\n \"samples_ns\": [ 112213500317, 37008108076, 29729674270 ],\n \"samples_ts\": [ 4.56273, 13.8348, 17.2219 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T07:05:58Z", "avg_ns": 38088396475, "stddev_ns": 4019918701, "avg_ts": 13.549558, "stddev_ts": 1.522768, "samples_ns": [ 33447035511, 40464507904, 40353646012 ], "samples_ts": [ 15.3078, 12.6531, 12.6878 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T07:08:04Z", "avg_ns": 59650427554, "stddev_ns": 958792515, "avg_ts": 11.873129, "stddev_ts": 6.553584, "samples_ns": [ 112213500317, 37008108076, 29729674270 ], "samples_ts": [ 4.56273, 13.8348, 17.2219 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 419 }, { "timestamp_utc": "2025-12-09T07:12:52.004956+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:11:05Z\",\n \"avg_ns\": 6446922013,\n \"stddev_ns\": 3429637782,\n \"avg_ts\": 23.591416,\n \"stddev_ts\": 10.868190,\n \"samples_ns\": [ 3750690866, 5283032706, 10307042467 ],\n \"samples_ts\": [ 34.127, 24.2285, 12.4187 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:11:27Z\",\n \"avg_ns\": 28018136236,\n \"stddev_ns\": 179749730,\n \"avg_ts\": 4.568595,\n \"stddev_ts\": 0.029264,\n \"samples_ns\": [ 27855622639, 27987581106, 28211204965 ],\n \"samples_ts\": [ 4.59512, 4.57346, 4.5372 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T07:11:05Z", "avg_ns": 6446922013, "stddev_ns": 3429637782, "avg_ts": 23.591416, "stddev_ts": 10.86819, "samples_ns": [ 3750690866, 5283032706, 10307042467 ], "samples_ts": [ 34.127, 24.2285, 12.4187 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T07:11:27Z", "avg_ns": 28018136236, "stddev_ns": 179749730, "avg_ts": 4.568595, "stddev_ts": 0.029264, "samples_ns": [ 27855622639, 27987581106, 28211204965 ], "samples_ts": [ 4.59512, 4.57346, 4.5372 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 420 }, { "timestamp_utc": "2025-12-09T07:16:14.040939+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:12:53Z\",\n \"avg_ns\": 10262880463,\n \"stddev_ns\": 20786433,\n \"avg_ts\": 12.472166,\n \"stddev_ts\": 0.025272,\n \"samples_ns\": [ 10265941938, 10281965881, 10240733572 ],\n \"samples_ts\": [ 12.4684, 12.449, 12.4991 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:13:34Z\",\n \"avg_ns\": 52999698263,\n \"stddev_ns\": 4084973456,\n \"avg_ts\": 12.879699,\n \"stddev_ts\": 6.637390,\n \"samples_ns\": [ 97590075269, 29608953819, 31800065703 ],\n \"samples_ts\": [ 5.24644, 17.2921, 16.1006 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T07:12:53Z", "avg_ns": 10262880463, "stddev_ns": 20786433, "avg_ts": 12.472166, "stddev_ts": 0.025272, "samples_ns": [ 10265941938, 10281965881, 10240733572 ], "samples_ts": [ 12.4684, 12.449, 12.4991 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T07:13:34Z", "avg_ns": 52999698263, "stddev_ns": 4084973456, "avg_ts": 12.879699, "stddev_ts": 6.63739, "samples_ns": [ 97590075269, 29608953819, 31800065703 ], "samples_ts": [ 5.24644, 17.2921, 16.1006 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 421 }, { "timestamp_utc": "2025-12-09T07:20:11.112482+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:16:14Z\",\n \"avg_ns\": 41041521055,\n \"stddev_ns\": 66534072,\n \"avg_ts\": 12.475193,\n \"stddev_ts\": 0.020207,\n \"samples_ns\": [ 41117604997, 40994250481, 41012707688 ],\n \"samples_ts\": [ 12.4521, 12.4896, 12.4839 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:18:57Z\",\n \"avg_ns\": 24442348395,\n \"stddev_ns\": 1590458200,\n \"avg_ts\": 5.521992,\n \"stddev_ts\": 1.665273,\n \"samples_ns\": [ 27962275121, 28171522695, 17193247371 ],\n \"samples_ts\": [ 4.5776, 4.5436, 7.44478 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T07:16:14Z", "avg_ns": 41041521055, "stddev_ns": 66534072, "avg_ts": 12.475193, "stddev_ts": 0.020207, "samples_ns": [ 41117604997, 40994250481, 41012707688 ], "samples_ts": [ 12.4521, 12.4896, 12.4839 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T07:18:57Z", "avg_ns": 24442348395, "stddev_ns": 1590458200, "avg_ts": 5.521992, "stddev_ts": 1.665273, "samples_ns": [ 27962275121, 28171522695, 17193247371 ], "samples_ts": [ 4.5776, 4.5436, 7.44478 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 422 }, { "timestamp_utc": "2025-12-09T07:25:20.888171+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:20:12Z\",\n \"avg_ns\": 11529210758,\n \"stddev_ns\": 5568548,\n \"avg_ts\": 44.408944,\n \"stddev_ts\": 0.021451,\n \"samples_ns\": [ 11531543427, 11533232506, 11522856342 ],\n \"samples_ts\": [ 44.4, 44.3935, 44.4334 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:20:58Z\",\n \"avg_ns\": 87516916362,\n \"stddev_ns\": 650618825,\n \"avg_ts\": 6.080474,\n \"stddev_ts\": 1.353599,\n \"samples_ns\": [ 73847158363, 113281631123, 75421959601 ],\n \"samples_ts\": [ 6.93324, 4.51971, 6.78847 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T07:20:12Z", "avg_ns": 11529210758, "stddev_ns": 5568548, "avg_ts": 44.408944, "stddev_ts": 0.021451, "samples_ns": [ 11531543427, 11533232506, 11522856342 ], "samples_ts": [ 44.4, 44.3935, 44.4334 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T07:20:58Z", "avg_ns": 87516916362, "stddev_ns": 650618825, "avg_ts": 6.080474, "stddev_ts": 1.353599, "samples_ns": [ 73847158363, 113281631123, 75421959601 ], "samples_ts": [ 6.93324, 4.51971, 6.78847 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 423 }, { "timestamp_utc": "2025-12-09T07:25:55.813401+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:25:21Z\",\n \"avg_ns\": 2872100797,\n \"stddev_ns\": 5148492,\n \"avg_ts\": 44.566777,\n \"stddev_ts\": 0.079962,\n \"samples_ns\": [ 2874718690, 2875413670, 2866170033 ],\n \"samples_ts\": [ 44.5261, 44.5153, 44.6589 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:25:33Z\",\n \"avg_ns\": 7465009040,\n \"stddev_ns\": 25670789,\n \"avg_ts\": 17.146799,\n \"stddev_ts\": 0.058853,\n \"samples_ns\": [ 7453004240, 7447540436, 7494482446 ],\n \"samples_ts\": [ 17.1743, 17.1869, 17.0792 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T07:25:21Z", "avg_ns": 2872100797, "stddev_ns": 5148492, "avg_ts": 44.566777, "stddev_ts": 0.079962, "samples_ns": [ 2874718690, 2875413670, 2866170033 ], "samples_ts": [ 44.5261, 44.5153, 44.6589 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T07:25:33Z", "avg_ns": 7465009040, "stddev_ns": 25670789, "avg_ts": 17.146799, "stddev_ts": 0.058853, "samples_ns": [ 7453004240, 7447540436, 7494482446 ], "samples_ts": [ 17.1743, 17.1869, 17.0792 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 424 }, { "timestamp_utc": "2025-12-09T07:30:30.356553+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:25:56Z\",\n \"avg_ns\": 2968788854,\n \"stddev_ns\": 182195480,\n \"avg_ts\": 43.220038,\n \"stddev_ts\": 2.561822,\n \"samples_ns\": [ 2867103987, 2860131024, 3179131551 ],\n \"samples_ts\": [ 44.6444, 44.7532, 40.2626 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:26:08Z\",\n \"avg_ns\": 87233530937,\n \"stddev_ns\": 4045681622,\n \"avg_ts\": 6.728949,\n \"stddev_ts\": 3.318205,\n \"samples_ns\": [ 100462673056, 112693997233, 48543922524 ],\n \"samples_ts\": [ 5.09642, 4.54328, 10.5471 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T07:25:56Z", "avg_ns": 2968788854, "stddev_ns": 182195480, "avg_ts": 43.220038, "stddev_ts": 2.561822, "samples_ns": [ 2867103987, 2860131024, 3179131551 ], "samples_ts": [ 44.6444, 44.7532, 40.2626 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T07:26:08Z", "avg_ns": 87233530937, "stddev_ns": 4045681622, "avg_ts": 6.728949, "stddev_ts": 3.318205, "samples_ns": [ 100462673056, 112693997233, 48543922524 ], "samples_ts": [ 5.09642, 4.54328, 10.5471 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 425 }, { "timestamp_utc": "2025-12-09T07:32:53.483877+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:30:31Z\",\n \"avg_ns\": 15665930528,\n \"stddev_ns\": 3865289541,\n \"avg_ts\": 36.774562,\n \"stddev_ts\": 13.356566,\n \"samples_ns\": [ 11516504721, 11502001713, 23979285150 ],\n \"samples_ts\": [ 44.4579, 44.514, 21.3518 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:31:29Z\",\n \"avg_ns\": 27759704967,\n \"stddev_ns\": 372362596,\n \"avg_ts\": 4.611551,\n \"stddev_ts\": 0.061571,\n \"samples_ns\": [ 27666457364, 28169829204, 27442828333 ],\n \"samples_ts\": [ 4.62654, 4.54387, 4.66424 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T07:30:31Z", "avg_ns": 15665930528, "stddev_ns": 3865289541, "avg_ts": 36.774562, "stddev_ts": 13.356566, "samples_ns": [ 11516504721, 11502001713, 23979285150 ], "samples_ts": [ 44.4579, 44.514, 21.3518 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T07:31:29Z", "avg_ns": 27759704967, "stddev_ns": 372362596, "avg_ts": 4.611551, "stddev_ts": 0.061571, "samples_ns": [ 27666457364, 28169829204, 27442828333 ], "samples_ts": [ 4.62654, 4.54387, 4.66424 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 426 }, { "timestamp_utc": "2025-12-09T07:38:14.059040+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:32:55Z\",\n \"avg_ns\": 33340479026,\n \"stddev_ns\": 4173451634,\n \"avg_ts\": 17.434528,\n \"stddev_ts\": 8.325906,\n \"samples_ns\": [ 40554380643, 40538064147, 18928992290 ],\n \"samples_ts\": [ 12.625, 12.6301, 27.0485 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:35:15Z\",\n \"avg_ns\": 59356136508,\n \"stddev_ns\": 3201715394,\n \"avg_ts\": 11.849597,\n \"stddev_ts\": 6.520323,\n \"samples_ns\": [ 29643758975, 37475372768, 110949277781 ],\n \"samples_ts\": [ 17.2718, 13.6623, 4.61472 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T07:32:55Z", "avg_ns": 33340479026, "stddev_ns": 4173451634, "avg_ts": 17.434528, "stddev_ts": 8.325906, "samples_ns": [ 40554380643, 40538064147, 18928992290 ], "samples_ts": [ 12.625, 12.6301, 27.0485 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T07:35:15Z", "avg_ns": 59356136508, "stddev_ns": 3201715394, "avg_ts": 11.849597, "stddev_ts": 6.520323, "samples_ns": [ 29643758975, 37475372768, 110949277781 ], "samples_ts": [ 17.2718, 13.6623, 4.61472 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 427 }, { "timestamp_utc": "2025-12-09T07:40:10.272586+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:38:15Z\",\n \"avg_ns\": 10130307359,\n \"stddev_ns\": 23698341,\n \"avg_ts\": 12.635398,\n \"stddev_ts\": 0.029581,\n \"samples_ns\": [ 10104326484, 10135858934, 10150736661 ],\n \"samples_ts\": [ 12.6678, 12.6284, 12.6099 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:38:56Z\",\n \"avg_ns\": 24561605047,\n \"stddev_ns\": 2093170272,\n \"avg_ts\": 5.363725,\n \"stddev_ts\": 1.174763,\n \"samples_ns\": [ 27491420485, 27145183708, 19048210949 ],\n \"samples_ts\": [ 4.656, 4.71539, 6.71979 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T07:38:15Z", "avg_ns": 10130307359, "stddev_ns": 23698341, "avg_ts": 12.635398, "stddev_ts": 0.029581, "samples_ns": [ 10104326484, 10135858934, 10150736661 ], "samples_ts": [ 12.6678, 12.6284, 12.6099 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T07:38:56Z", "avg_ns": 24561605047, "stddev_ns": 2093170272, "avg_ts": 5.363725, "stddev_ts": 1.174763, "samples_ns": [ 27491420485, 27145183708, 19048210949 ], "samples_ts": [ 4.656, 4.71539, 6.71979 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 428 }, { "timestamp_utc": "2025-12-09T07:43:31.512281+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:40:11Z\",\n \"avg_ns\": 2875457047,\n \"stddev_ns\": 6773196,\n \"avg_ts\": 44.514827,\n \"stddev_ts\": 0.104795,\n \"samples_ns\": [ 2882707851, 2869293398, 2874369893 ],\n \"samples_ts\": [ 44.4027, 44.6103, 44.5315 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:40:22Z\",\n \"avg_ns\": 62867909103,\n \"stddev_ns\": 3739027780,\n \"avg_ts\": 10.848834,\n \"stddev_ts\": 6.582534,\n \"samples_ns\": [ 28773166347, 50945603734, 108884957228 ],\n \"samples_ts\": [ 17.7944, 10.0499, 4.70221 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T07:40:11Z", "avg_ns": 2875457047, "stddev_ns": 6773196, "avg_ts": 44.514827, "stddev_ts": 0.104795, "samples_ns": [ 2882707851, 2869293398, 2874369893 ], "samples_ts": [ 44.4027, 44.6103, 44.5315 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T07:40:22Z", "avg_ns": 62867909103, "stddev_ns": 3739027780, "avg_ts": 10.848834, "stddev_ts": 6.582534, "samples_ns": [ 28773166347, 50945603734, 108884957228 ], "samples_ts": [ 17.7944, 10.0499, 4.70221 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 429 }, { "timestamp_utc": "2025-12-09T07:45:56.229309+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:43:33Z\",\n \"avg_ns\": 26843806232,\n \"stddev_ns\": 4270019504,\n \"avg_ts\": 24.778073,\n \"stddev_ts\": 16.430543,\n \"samples_ns\": [ 40840002468, 27908198271, 11783217957 ],\n \"samples_ts\": [ 12.5367, 18.3459, 43.4516 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:45:34Z\",\n \"avg_ns\": 7160232695,\n \"stddev_ns\": 22363831,\n \"avg_ts\": 17.876630,\n \"stddev_ts\": 0.055734,\n \"samples_ns\": [ 7147118711, 7147524399, 7186054976 ],\n \"samples_ts\": [ 17.9093, 17.9083, 17.8123 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T07:43:33Z", "avg_ns": 26843806232, "stddev_ns": 4270019504, "avg_ts": 24.778073, "stddev_ts": 16.430543, "samples_ns": [ 40840002468, 27908198271, 11783217957 ], "samples_ts": [ 12.5367, 18.3459, 43.4516 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T07:45:34Z", "avg_ns": 7160232695, "stddev_ns": 22363831, "avg_ts": 17.87663, "stddev_ts": 0.055734, "samples_ns": [ 7147118711, 7147524399, 7186054976 ], "samples_ts": [ 17.9093, 17.9083, 17.8123 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 430 }, { "timestamp_utc": "2025-12-09T07:51:00.870705+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:45:57Z\",\n \"avg_ns\": 37311979205,\n \"stddev_ns\": 1445339747,\n \"avg_ts\": 13.939086,\n \"stddev_ts\": 2.225835,\n \"samples_ns\": [ 31012929432, 40486040246, 40436967938 ],\n \"samples_ts\": [ 16.5092, 12.6463, 12.6617 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:48:01Z\",\n \"avg_ns\": 59779969672,\n \"stddev_ns\": 4126704634,\n \"avg_ts\": 11.663083,\n \"stddev_ts\": 6.598231,\n \"samples_ns\": [ 109738978999, 40791625077, 28809304941 ],\n \"samples_ts\": [ 4.66562, 12.5516, 17.772 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T07:45:57Z", "avg_ns": 37311979205, "stddev_ns": 1445339747, "avg_ts": 13.939086, "stddev_ts": 2.225835, "samples_ns": [ 31012929432, 40486040246, 40436967938 ], "samples_ts": [ 16.5092, 12.6463, 12.6617 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_type": "gemma3 1B Q2_K - Medium", "model_size": 683281408, "model_n_params": 999885952, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T07:48:01Z", "avg_ns": 59779969672, "stddev_ns": 4126704634, "avg_ts": 11.663083, "stddev_ts": 6.598231, "samples_ns": [ 109738978999, 40791625077, 28809304941 ], "samples_ts": [ 4.66562, 12.5516, 17.772 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-1B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 431 }, { "timestamp_utc": "2025-12-09T07:56:27.677903+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:51:25Z\",\n \"avg_ns\": 33231225053,\n \"stddev_ns\": 35483146,\n \"avg_ts\": 3.851802,\n \"stddev_ts\": 0.004112,\n \"samples_ns\": [ 33269086727, 33198734810, 33225853624 ],\n \"samples_ts\": [ 3.84742, 3.85557, 3.85242 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:53:29Z\",\n \"avg_ns\": 58537644411,\n \"stddev_ns\": 4225363977,\n \"avg_ts\": 2.319446,\n \"stddev_ts\": 0.645973,\n \"samples_ns\": [ 79035893301, 52327665202, 44249374732 ],\n \"samples_ts\": [ 1.61952, 2.44612, 2.8927 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T07:51:25Z", "avg_ns": 33231225053, "stddev_ns": 35483146, "avg_ts": 3.851802, "stddev_ts": 0.004112, "samples_ns": [ 33269086727, 33198734810, 33225853624 ], "samples_ts": [ 3.84742, 3.85557, 3.85242 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T07:53:29Z", "avg_ns": 58537644411, "stddev_ns": 4225363977, "avg_ts": 2.319446, "stddev_ts": 0.645973, "samples_ns": [ 79035893301, 52327665202, 44249374732 ], "samples_ts": [ 1.61952, 2.44612, 2.8927 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 512, "ubatch_size": 128, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 432 }, { "timestamp_utc": "2025-12-09T08:10:26.597412+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:56:31Z\",\n \"avg_ns\": 26175080219,\n \"stddev_ns\": 3963803463,\n \"avg_ts\": 5.175508,\n \"stddev_ts\": 1.560887,\n \"samples_ns\": [ 18523014597, 27053292677, 32948933383 ],\n \"samples_ts\": [ 6.91032, 4.7314, 3.8848 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:58:08Z\",\n \"avg_ns\": 245758125543,\n \"stddev_ns\": 1243743502,\n \"avg_ts\": 2.087778,\n \"stddev_ts\": 0.119696,\n \"samples_ns\": [ 230011033588, 253441514068, 253821828973 ],\n \"samples_ts\": [ 2.22598, 2.02019, 2.01716 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T07:56:31Z", "avg_ns": 26175080219, "stddev_ns": 3963803463, "avg_ts": 5.175508, "stddev_ts": 1.560887, "samples_ns": [ 18523014597, 27053292677, 32948933383 ], "samples_ts": [ 6.91032, 4.7314, 3.8848 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T07:58:08Z", "avg_ns": 245758125543, "stddev_ns": 1243743502, "avg_ts": 2.087778, "stddev_ts": 0.119696, "samples_ns": [ 230011033588, 253441514068, 253821828973 ], "samples_ts": [ 2.22598, 2.02019, 2.01716 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 512, "ubatch_size": 128, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 433 }, { "timestamp_utc": "2025-12-09T08:20:14.657767+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T08:10:29Z\",\n \"avg_ns\": 100346627523,\n \"stddev_ns\": 4249783711,\n \"avg_ts\": 5.306378,\n \"stddev_ts\": 1.364666,\n \"samples_ns\": [ 111730946325, 114895449903, 74413486343 ],\n \"samples_ts\": [ 4.58244, 4.45623, 6.88047 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T08:16:45Z\",\n \"avg_ns\": 69600215899,\n \"stddev_ns\": 4212784660,\n \"avg_ts\": 1.886254,\n \"stddev_ts\": 0.383697,\n \"samples_ns\": [ 55068956520, 79502772352, 74228918826 ],\n \"samples_ts\": [ 2.32436, 1.61001, 1.7244 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T08:10:29Z", "avg_ns": 100346627523, "stddev_ns": 4249783711, "avg_ts": 5.306378, "stddev_ts": 1.364666, "samples_ns": [ 111730946325, 114895449903, 74413486343 ], "samples_ts": [ 4.58244, 4.45623, 6.88047 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T08:16:45Z", "avg_ns": 69600215899, "stddev_ns": 4212784660, "avg_ts": 1.886254, "stddev_ts": 0.383697, "samples_ns": [ 55068956520, 79502772352, 74228918826 ], "samples_ts": [ 2.32436, 1.61001, 1.7244 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 512, "ubatch_size": 128, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 434 }, { "timestamp_utc": "2025-12-09T08:38:27.361398+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T08:20:17Z\",\n \"avg_ns\": 99400753159,\n \"stddev_ns\": 4128133897,\n \"avg_ts\": 5.394805,\n \"stddev_ts\": 1.419341,\n \"samples_ns\": [ 97723424577, 126119734887, 74359100014 ],\n \"samples_ts\": [ 5.23928, 4.05963, 6.88551 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T08:26:30Z\",\n \"avg_ns\": 238719141079,\n \"stddev_ns\": 3587034107,\n \"avg_ts\": 2.154808,\n \"stddev_ts\": 0.183824,\n \"samples_ns\": [ 253533756937, 246049440238, 216574226063 ],\n \"samples_ts\": [ 2.01945, 2.08088, 2.36409 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T08:20:17Z", "avg_ns": 99400753159, "stddev_ns": 4128133897, "avg_ts": 5.394805, "stddev_ts": 1.419341, "samples_ns": [ 97723424577, 126119734887, 74359100014 ], "samples_ts": [ 5.23928, 4.05963, 6.88551 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T08:26:30Z", "avg_ns": 238719141079, "stddev_ns": 3587034107, "avg_ts": 2.154808, "stddev_ts": 0.183824, "samples_ns": [ 253533756937, 246049440238, 216574226063 ], "samples_ts": [ 2.01945, 2.08088, 2.36409 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 512, "ubatch_size": 128, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 435 }, { "timestamp_utc": "2025-12-09T08:43:06.628728+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T08:38:32Z\",\n \"avg_ns\": 27551119882,\n \"stddev_ns\": 3884969453,\n \"avg_ts\": 4.959695,\n \"stddev_ts\": 1.661153,\n \"samples_ns\": [ 33262708490, 30756939597, 18633711559 ],\n \"samples_ts\": [ 3.84815, 4.16166, 6.86927 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T08:40:29Z\",\n \"avg_ns\": 52274384428,\n \"stddev_ns\": 4022060820,\n \"avg_ts\": 2.556845,\n \"stddev_ts\": 0.599107,\n \"samples_ns\": [ 44107785991, 44084769717, 68630597578 ],\n \"samples_ts\": [ 2.90198, 2.9035, 1.86506 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T08:38:32Z", "avg_ns": 27551119882, "stddev_ns": 3884969453, "avg_ts": 4.959695, "stddev_ts": 1.661153, "samples_ns": [ 33262708490, 30756939597, 18633711559 ], "samples_ts": [ 3.84815, 4.16166, 6.86927 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T08:40:29Z", "avg_ns": 52274384428, "stddev_ns": 4022060820, "avg_ts": 2.556845, "stddev_ts": 0.599107, "samples_ns": [ 44107785991, 44084769717, 68630597578 ], "samples_ts": [ 2.90198, 2.9035, 1.86506 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 512, "ubatch_size": 256, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 436 }, { "timestamp_utc": "2025-12-09T08:57:01.375475+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T08:43:12Z\",\n \"avg_ns\": 30660076647,\n \"stddev_ns\": 3613019620,\n \"avg_ts\": 4.248473,\n \"stddev_ts\": 0.717769,\n \"samples_ns\": [ 33317550893, 33452098652, 25210580396 ],\n \"samples_ts\": [ 3.84182, 3.82637, 5.07723 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T08:45:17Z\",\n \"avg_ns\": 234335529756,\n \"stddev_ns\": 1726385990,\n \"avg_ts\": 2.185716,\n \"stddev_ts\": 0.051726,\n \"samples_ns\": [ 234556609349, 228692009992, 239757969928 ],\n \"samples_ts\": [ 2.18284, 2.23882, 2.13549 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T08:43:12Z", "avg_ns": 30660076647, "stddev_ns": 3613019620, "avg_ts": 4.248473, "stddev_ts": 0.717769, "samples_ns": [ 33317550893, 33452098652, 25210580396 ], "samples_ts": [ 3.84182, 3.82637, 5.07723 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T08:45:17Z", "avg_ns": 234335529756, "stddev_ns": 1726385990, "avg_ts": 2.185716, "stddev_ts": 0.051726, "samples_ns": [ 234556609349, 228692009992, 239757969928 ], "samples_ts": [ 2.18284, 2.23882, 2.13549 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 512, "ubatch_size": 256, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 437 }, { "timestamp_utc": "2025-12-09T09:06:48.629051+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T08:57:04Z\",\n \"avg_ns\": 99233535958,\n \"stddev_ns\": 1756507456,\n \"avg_ts\": 5.380567,\n \"stddev_ts\": 1.373894,\n \"samples_ns\": [ 100436853407, 74484997992, 122778756475 ],\n \"samples_ts\": [ 5.09773, 6.87387, 4.1701 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:04:06Z\",\n \"avg_ns\": 53896421188,\n \"stddev_ns\": 887785056,\n \"avg_ts\": 2.492970,\n \"stddev_ts\": 0.615253,\n \"samples_ns\": [ 71806959988, 44868452506, 45013851070 ],\n \"samples_ts\": [ 1.78256, 2.85278, 2.84357 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T08:57:04Z", "avg_ns": 99233535958, "stddev_ns": 1756507456, "avg_ts": 5.380567, "stddev_ts": 1.373894, "samples_ns": [ 100436853407, 74484997992, 122778756475 ], "samples_ts": [ 5.09773, 6.87387, 4.1701 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T09:04:06Z", "avg_ns": 53896421188, "stddev_ns": 887785056, "avg_ts": 2.49297, "stddev_ts": 0.615253, "samples_ns": [ 71806959988, 44868452506, 45013851070 ], "samples_ts": [ 1.78256, 2.85278, 2.84357 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 512, "ubatch_size": 256, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 438 }, { "timestamp_utc": "2025-12-09T09:26:03.594188+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:06:51Z\",\n \"avg_ns\": 99278268550,\n \"stddev_ns\": 3399112710,\n \"avg_ts\": 5.348635,\n \"stddev_ts\": 1.324222,\n \"samples_ns\": [ 110547902109, 74453722317, 112833181225 ],\n \"samples_ts\": [ 4.63148, 6.87675, 4.53767 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:13:43Z\",\n \"avg_ns\": 246362558910,\n \"stddev_ns\": 3772086854,\n \"avg_ts\": 2.085125,\n \"stddev_ts\": 0.149250,\n \"samples_ns\": [ 227121168201, 252342311734, 259624196797 ],\n \"samples_ts\": [ 2.2543, 2.02899, 1.97208 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T09:06:51Z", "avg_ns": 99278268550, "stddev_ns": 3399112710, "avg_ts": 5.348635, "stddev_ts": 1.324222, "samples_ns": [ 110547902109, 74453722317, 112833181225 ], "samples_ts": [ 4.63148, 6.87675, 4.53767 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T09:13:43Z", "avg_ns": 246362558910, "stddev_ns": 3772086854, "avg_ts": 2.085125, "stddev_ts": 0.14925, "samples_ns": [ 227121168201, 252342311734, 259624196797 ], "samples_ts": [ 2.2543, 2.02899, 1.97208 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 512, "ubatch_size": 256, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 439 }, { "timestamp_utc": "2025-12-09T09:30:54.427184+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:26:06Z\",\n \"avg_ns\": 19767831413,\n \"stddev_ns\": 3660627824,\n \"avg_ts\": 6.519004,\n \"stddev_ts\": 0.636029,\n \"samples_ns\": [ 18569897485, 18605963415, 22127633339 ],\n \"samples_ts\": [ 6.89288, 6.87951, 5.78462 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:27:24Z\",\n \"avg_ns\": 69596397591,\n \"stddev_ns\": 3431059687,\n \"avg_ts\": 1.979469,\n \"stddev_ts\": 0.708942,\n \"samples_ns\": [ 81985961804, 81056584649, 45746646320 ],\n \"samples_ts\": [ 1.56124, 1.57914, 2.79802 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T09:26:06Z", "avg_ns": 19767831413, "stddev_ns": 3660627824, "avg_ts": 6.519004, "stddev_ts": 0.636029, "samples_ns": [ 18569897485, 18605963415, 22127633339 ], "samples_ts": [ 6.89288, 6.87951, 5.78462 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T09:27:24Z", "avg_ns": 69596397591, "stddev_ns": 3431059687, "avg_ts": 1.979469, "stddev_ts": 0.708942, "samples_ns": [ 81985961804, 81056584649, 45746646320 ], "samples_ts": [ 1.56124, 1.57914, 2.79802 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 512, "ubatch_size": 512, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 440 }, { "timestamp_utc": "2025-12-09T09:45:14.399236+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:30:57Z\",\n \"avg_ns\": 18561843303,\n \"stddev_ns\": 47173261,\n \"avg_ts\": 6.895897,\n \"stddev_ts\": 0.017500,\n \"samples_ns\": [ 18538854457, 18530571921, 18616103533 ],\n \"samples_ts\": [ 6.90442, 6.9075, 6.87577 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:32:11Z\",\n \"avg_ns\": 260643395267,\n \"stddev_ns\": 1369070592,\n \"avg_ts\": 1.964406,\n \"stddev_ts\": 0.010288,\n \"samples_ns\": [ 259770938534, 259937928200, 262221319068 ],\n \"samples_ts\": [ 1.97097, 1.9697, 1.95255 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T09:30:57Z", "avg_ns": 18561843303, "stddev_ns": 47173261, "avg_ts": 6.895897, "stddev_ts": 0.0175, "samples_ns": [ 18538854457, 18530571921, 18616103533 ], "samples_ts": [ 6.90442, 6.9075, 6.87577 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T09:32:11Z", "avg_ns": 260643395267, "stddev_ns": 1369070592, "avg_ts": 1.964406, "stddev_ts": 0.010288, "samples_ns": [ 259770938534, 259937928200, 262221319068 ], "samples_ts": [ 1.97097, 1.9697, 1.95255 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 512, "ubatch_size": 512, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 441 }, { "timestamp_utc": "2025-12-09T09:55:03.364568+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:45:17Z\",\n \"avg_ns\": 100535311722,\n \"stddev_ns\": 4118428610,\n \"avg_ts\": 5.320161,\n \"stddev_ts\": 1.375271,\n \"samples_ns\": [ 100492763452, 125754324304, 75358847411 ],\n \"samples_ts\": [ 5.09489, 4.07143, 6.79416 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:51:34Z\",\n \"avg_ns\": 69282929863,\n \"stddev_ns\": 4214010172,\n \"avg_ts\": 1.967262,\n \"stddev_ts\": 0.648603,\n \"samples_ns\": [ 47124613571, 80309330282, 80414845737 ],\n \"samples_ts\": [ 2.7162, 1.59384, 1.59175 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T09:45:17Z", "avg_ns": 100535311722, "stddev_ns": 4118428610, "avg_ts": 5.320161, "stddev_ts": 1.375271, "samples_ns": [ 100492763452, 125754324304, 75358847411 ], "samples_ts": [ 5.09489, 4.07143, 6.79416 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T09:51:34Z", "avg_ns": 69282929863, "stddev_ns": 4214010172, "avg_ts": 1.967262, "stddev_ts": 0.648603, "samples_ns": [ 47124613571, 80309330282, 80414845737 ], "samples_ts": [ 2.7162, 1.59384, 1.59175 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 512, "ubatch_size": 512, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 442 }, { "timestamp_utc": "2025-12-09T10:13:36.883048+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:55:08Z\",\n \"avg_ns\": 100551874124,\n \"stddev_ns\": 3988984286,\n \"avg_ts\": 5.380466,\n \"stddev_ts\": 1.481127,\n \"samples_ns\": [ 92985289896, 133317386120, 75352946358 ],\n \"samples_ts\": [ 5.50625, 3.84046, 6.79469 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:01:25Z\",\n \"avg_ns\": 243528990204,\n \"stddev_ns\": 2017275858,\n \"avg_ts\": 2.110244,\n \"stddev_ts\": 0.160407,\n \"samples_ns\": [ 256768273055, 250555047474, 223263650085 ],\n \"samples_ts\": [ 1.99402, 2.04346, 2.29325 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T09:55:08Z", "avg_ns": 100551874124, "stddev_ns": 3988984286, "avg_ts": 5.380466, "stddev_ts": 1.481127, "samples_ns": [ 92985289896, 133317386120, 75352946358 ], "samples_ts": [ 5.50625, 3.84046, 6.79469 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T10:01:25Z", "avg_ns": 243528990204, "stddev_ns": 2017275858, "avg_ts": 2.110244, "stddev_ts": 0.160407, "samples_ns": [ 256768273055, 250555047474, 223263650085 ], "samples_ts": [ 1.99402, 2.04346, 2.29325 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 512, "ubatch_size": 512, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 443 }, { "timestamp_utc": "2025-12-09T10:18:26.852720+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:13:42Z\",\n \"avg_ns\": 26230556441,\n \"stddev_ns\": 4271293362,\n \"avg_ts\": 5.171493,\n \"stddev_ts\": 1.564468,\n \"samples_ns\": [ 33402265682, 26714391619, 18575012022 ],\n \"samples_ts\": [ 3.83208, 4.79142, 6.89098 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:15:34Z\",\n \"avg_ns\": 57139951745,\n \"stddev_ns\": 3298501493,\n \"avg_ts\": 2.391619,\n \"stddev_ts\": 0.674545,\n \"samples_ns\": [ 45992187165, 46058890924, 79368777148 ],\n \"samples_ts\": [ 2.78308, 2.77905, 1.61272 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T10:13:42Z", "avg_ns": 26230556441, "stddev_ns": 4271293362, "avg_ts": 5.171493, "stddev_ts": 1.564468, "samples_ns": [ 33402265682, 26714391619, 18575012022 ], "samples_ts": [ 3.83208, 4.79142, 6.89098 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T10:15:34Z", "avg_ns": 57139951745, "stddev_ns": 3298501493, "avg_ts": 2.391619, "stddev_ts": 0.674545, "samples_ns": [ 45992187165, 46058890924, 79368777148 ], "samples_ts": [ 2.78308, 2.77905, 1.61272 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 444 }, { "timestamp_utc": "2025-12-09T10:32:21.345518+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:18:32Z\",\n \"avg_ns\": 27650251250,\n \"stddev_ns\": 4244281591,\n \"avg_ts\": 4.957646,\n \"stddev_ts\": 1.704023,\n \"samples_ns\": [ 33390673821, 31058291943, 18501787988 ],\n \"samples_ts\": [ 3.83341, 4.12128, 6.91825 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:20:29Z\",\n \"avg_ns\": 237171088657,\n \"stddev_ns\": 3568454228,\n \"avg_ts\": 2.161403,\n \"stddev_ts\": 0.091098,\n \"samples_ns\": [ 249000857036, 231156100549, 231356308387 ],\n \"samples_ts\": [ 2.05622, 2.21495, 2.21304 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T10:18:32Z", "avg_ns": 27650251250, "stddev_ns": 4244281591, "avg_ts": 4.957646, "stddev_ts": 1.704023, "samples_ns": [ 33390673821, 31058291943, 18501787988 ], "samples_ts": [ 3.83341, 4.12128, 6.91825 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T10:20:29Z", "avg_ns": 237171088657, "stddev_ns": 3568454228, "avg_ts": 2.161403, "stddev_ts": 0.091098, "samples_ns": [ 249000857036, 231156100549, 231356308387 ], "samples_ts": [ 2.05622, 2.21495, 2.21304 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 445 }, { "timestamp_utc": "2025-12-09T10:42:15.702753+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:32:27Z\",\n \"avg_ns\": 101980083885,\n \"stddev_ns\": 288975309,\n \"avg_ts\": 5.283309,\n \"stddev_ts\": 1.337800,\n \"samples_ns\": [ 86691760413, 82630277033, 136618214211 ],\n \"samples_ts\": [ 5.90598, 6.19628, 3.74767 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:39:48Z\",\n \"avg_ns\": 48715524111,\n \"stddev_ns\": 3335483261,\n \"avg_ts\": 2.641840,\n \"stddev_ts\": 0.232317,\n \"samples_ns\": [ 53920050259, 45930227664, 46296294412 ],\n \"samples_ts\": [ 2.37389, 2.78684, 2.7648 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T10:32:27Z", "avg_ns": 101980083885, "stddev_ns": 288975309, "avg_ts": 5.283309, "stddev_ts": 1.3378, "samples_ns": [ 86691760413, 82630277033, 136618214211 ], "samples_ts": [ 5.90598, 6.19628, 3.74767 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T10:39:48Z", "avg_ns": 48715524111, "stddev_ns": 3335483261, "avg_ts": 2.64184, "stddev_ts": 0.232317, "samples_ns": [ 53920050259, 45930227664, 46296294412 ], "samples_ts": [ 2.37389, 2.78684, 2.7648 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 446 }, { "timestamp_utc": "2025-12-09T11:01:41.975672+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:42:20Z\",\n \"avg_ns\": 101830406116,\n \"stddev_ns\": 3963502593,\n \"avg_ts\": 5.299406,\n \"stddev_ts\": 1.380421,\n \"samples_ns\": [ 89609258168, 79685731069, 136196229113 ],\n \"samples_ts\": [ 5.7137, 6.42524, 3.75928 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:49:42Z\",\n \"avg_ns\": 239630308475,\n \"stddev_ns\": 4278317820,\n \"avg_ts\": 2.143241,\n \"stddev_ts\": 0.143019,\n \"samples_ns\": [ 229709674586, 230351798657, 258829452183 ],\n \"samples_ts\": [ 2.2289, 2.22269, 1.97814 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T10:42:20Z", "avg_ns": 101830406116, "stddev_ns": 3963502593, "avg_ts": 5.299406, "stddev_ts": 1.380421, "samples_ns": [ 89609258168, 79685731069, 136196229113 ], "samples_ts": [ 5.7137, 6.42524, 3.75928 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T10:49:42Z", "avg_ns": 239630308475, "stddev_ns": 4278317820, "avg_ts": 2.143241, "stddev_ts": 0.143019, "samples_ns": [ 229709674586, 230351798657, 258829452183 ], "samples_ts": [ 2.2289, 2.22269, 1.97814 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 447 }, { "timestamp_utc": "2025-12-09T11:06:33.138503+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:01:45Z\",\n \"avg_ns\": 29820411549,\n \"stddev_ns\": 845953741,\n \"avg_ts\": 4.434220,\n \"stddev_ts\": 1.034700,\n \"samples_ns\": [ 22739606347, 33284826556, 33436801745 ],\n \"samples_ts\": [ 5.62895, 3.8456, 3.82812 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:03:33Z\",\n \"avg_ns\": 59688609991,\n \"stddev_ns\": 1492539524,\n \"avg_ts\": 2.286738,\n \"stddev_ts\": 0.647674,\n \"samples_ns\": [ 82303451396, 50838018730, 45924359848 ],\n \"samples_ts\": [ 1.55522, 2.5178, 2.78719 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T11:01:45Z", "avg_ns": 29820411549, "stddev_ns": 845953741, "avg_ts": 4.43422, "stddev_ts": 1.0347, "samples_ns": [ 22739606347, 33284826556, 33436801745 ], "samples_ts": [ 5.62895, 3.8456, 3.82812 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T11:03:33Z", "avg_ns": 59688609991, "stddev_ns": 1492539524, "avg_ts": 2.286738, "stddev_ts": 0.647674, "samples_ns": [ 82303451396, 50838018730, 45924359848 ], "samples_ts": [ 1.55522, 2.5178, 2.78719 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 448 }, { "timestamp_utc": "2025-12-09T11:20:54.826221+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:06:36Z\",\n \"avg_ns\": 27544660630,\n \"stddev_ns\": 2291756786,\n \"avg_ts\": 4.955513,\n \"stddev_ts\": 1.642299,\n \"samples_ns\": [ 18712754083, 30514774557, 33406453251 ],\n \"samples_ts\": [ 6.84025, 4.19469, 3.8316 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:08:17Z\",\n \"avg_ns\": 252108002803,\n \"stddev_ns\": 713155459,\n \"avg_ts\": 2.036612,\n \"stddev_ts\": 0.134858,\n \"samples_ns\": [ 233560812487, 262255843783, 260507352141 ],\n \"samples_ts\": [ 2.19215, 1.95229, 1.9654 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T11:06:36Z", "avg_ns": 27544660630, "stddev_ns": 2291756786, "avg_ts": 4.955513, "stddev_ts": 1.642299, "samples_ns": [ 18712754083, 30514774557, 33406453251 ], "samples_ts": [ 6.84025, 4.19469, 3.8316 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T11:08:17Z", "avg_ns": 252108002803, "stddev_ns": 713155459, "avg_ts": 2.036612, "stddev_ts": 0.134858, "samples_ns": [ 233560812487, 262255843783, 260507352141 ], "samples_ts": [ 2.19215, 1.95229, 1.9654 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 449 }, { "timestamp_utc": "2025-12-09T11:30:43.565863+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:20:58Z\",\n \"avg_ns\": 100016043749,\n \"stddev_ns\": 3523753318,\n \"avg_ts\": 5.391614,\n \"stddev_ts\": 1.456849,\n \"samples_ns\": [ 130872583283, 94155025862, 75020522103 ],\n \"samples_ts\": [ 3.9122, 5.43784, 6.8248 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:27:13Z\",\n \"avg_ns\": 69950580474,\n \"stddev_ns\": 3863114980,\n \"avg_ts\": 1.902530,\n \"stddev_ts\": 0.486931,\n \"samples_ns\": [ 77276643642, 80614256712, 51960841070 ],\n \"samples_ts\": [ 1.65639, 1.58781, 2.46339 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T11:20:58Z", "avg_ns": 100016043749, "stddev_ns": 3523753318, "avg_ts": 5.391614, "stddev_ts": 1.456849, "samples_ns": [ 130872583283, 94155025862, 75020522103 ], "samples_ts": [ 3.9122, 5.43784, 6.8248 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T11:27:13Z", "avg_ns": 69950580474, "stddev_ns": 3863114980, "avg_ts": 1.90253, "stddev_ts": 0.486931, "samples_ns": [ 77276643642, 80614256712, 51960841070 ], "samples_ts": [ 1.65639, 1.58781, 2.46339 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 450 }, { "timestamp_utc": "2025-12-09T11:49:42.405054+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:30:46Z\",\n \"avg_ns\": 100109602288,\n \"stddev_ns\": 513119417,\n \"avg_ts\": 5.330219,\n \"stddev_ts\": 1.365568,\n \"samples_ns\": [ 122409789617, 103071155996, 74847861252 ],\n \"samples_ts\": [ 4.18267, 4.96744, 6.84054 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:37:01Z\",\n \"avg_ns\": 253211709705,\n \"stddev_ns\": 2395102974,\n \"avg_ts\": 2.022934,\n \"stddev_ts\": 0.052950,\n \"samples_ns\": [ 257055313138, 256906850275, 245672965703 ],\n \"samples_ts\": [ 1.99179, 1.99294, 2.08407 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T11:30:46Z", "avg_ns": 100109602288, "stddev_ns": 513119417, "avg_ts": 5.330219, "stddev_ts": 1.365568, "samples_ns": [ 122409789617, 103071155996, 74847861252 ], "samples_ts": [ 4.18267, 4.96744, 6.84054 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T11:37:01Z", "avg_ns": 253211709705, "stddev_ns": 2395102974, "avg_ts": 2.022934, "stddev_ts": 0.05295, "samples_ns": [ 257055313138, 256906850275, 245672965703 ], "samples_ts": [ 1.99179, 1.99294, 2.08407 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 451 }, { "timestamp_utc": "2025-12-09T11:54:25.486639+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:49:48Z\",\n \"avg_ns\": 18495890427,\n \"stddev_ns\": 19234620,\n \"avg_ts\": 6.920461,\n \"stddev_ts\": 0.007197,\n \"samples_ns\": [ 18476971284, 18515425720, 18495274277 ],\n \"samples_ts\": [ 6.92754, 6.91315, 6.92069 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:51:10Z\",\n \"avg_ns\": 64636021630,\n \"stddev_ns\": 2003146459,\n \"avg_ts\": 2.099723,\n \"stddev_ts\": 0.639106,\n \"samples_ns\": [ 45650106369, 66023101444, 82234857079 ],\n \"samples_ts\": [ 2.80394, 1.93872, 1.55652 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T11:49:48Z", "avg_ns": 18495890427, "stddev_ns": 19234620, "avg_ts": 6.920461, "stddev_ts": 0.007197, "samples_ns": [ 18476971284, 18515425720, 18495274277 ], "samples_ts": [ 6.92754, 6.91315, 6.92069 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T11:51:10Z", "avg_ns": 64636021630, "stddev_ns": 2003146459, "avg_ts": 2.099723, "stddev_ts": 0.639106, "samples_ns": [ 45650106369, 66023101444, 82234857079 ], "samples_ts": [ 2.80394, 1.93872, 1.55652 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 452 }, { "timestamp_utc": "2025-12-09T12:08:15.934199+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:54:31Z\",\n \"avg_ns\": 19195734565,\n \"stddev_ns\": 690512380,\n \"avg_ts\": 6.673788,\n \"stddev_ts\": 0.235187,\n \"samples_ns\": [ 19993057553, 18800863602, 18793282541 ],\n \"samples_ts\": [ 6.40222, 6.8082, 6.81094 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:56:02Z\",\n \"avg_ns\": 244137035229,\n \"stddev_ns\": 854028484,\n \"avg_ts\": 2.103250,\n \"stddev_ts\": 0.138152,\n \"samples_ns\": [ 260674305374, 243208209677, 228528590636 ],\n \"samples_ts\": [ 1.96414, 2.10519, 2.24042 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T11:54:31Z", "avg_ns": 19195734565, "stddev_ns": 690512380, "avg_ts": 6.673788, "stddev_ts": 0.235187, "samples_ns": [ 19993057553, 18800863602, 18793282541 ], "samples_ts": [ 6.40222, 6.8082, 6.81094 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T11:56:02Z", "avg_ns": 244137035229, "stddev_ns": 854028484, "avg_ts": 2.10325, "stddev_ts": 0.138152, "samples_ns": [ 260674305374, 243208209677, 228528590636 ], "samples_ts": [ 1.96414, 2.10519, 2.24042 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 453 }, { "timestamp_utc": "2025-12-09T12:18:12.117005+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T12:08:21Z\",\n \"avg_ns\": 101659388909,\n \"stddev_ns\": 3743902509,\n \"avg_ts\": 5.225901,\n \"stddev_ts\": 1.291809,\n \"samples_ns\": [ 76414456840, 109294312705, 119269397182 ],\n \"samples_ts\": [ 6.7003, 4.6846, 4.2928 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T12:15:30Z\",\n \"avg_ns\": 53694828487,\n \"stddev_ns\": 555271962,\n \"avg_ts\": 2.480625,\n \"stddev_ts\": 0.559658,\n \"samples_ns\": [ 45688508793, 45618117748, 69777858921 ],\n \"samples_ts\": [ 2.80158, 2.8059, 1.83439 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T12:08:21Z", "avg_ns": 101659388909, "stddev_ns": 3743902509, "avg_ts": 5.225901, "stddev_ts": 1.291809, "samples_ns": [ 76414456840, 109294312705, 119269397182 ], "samples_ts": [ 6.7003, 4.6846, 4.2928 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T12:15:30Z", "avg_ns": 53694828487, "stddev_ns": 555271962, "avg_ts": 2.480625, "stddev_ts": 0.559658, "samples_ns": [ 45688508793, 45618117748, 69777858921 ], "samples_ts": [ 2.80158, 2.8059, 1.83439 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 454 }, { "timestamp_utc": "2025-12-09T12:37:11.677081+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T12:18:17Z\",\n \"avg_ns\": 100449908193,\n \"stddev_ns\": 480466923,\n \"avg_ts\": 5.312427,\n \"stddev_ts\": 1.357104,\n \"samples_ns\": [ 75236818022, 102832542594, 123280363964 ],\n \"samples_ts\": [ 6.80518, 4.97897, 4.15314 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T12:25:23Z\",\n \"avg_ns\": 235981842065,\n \"stddev_ns\": 3800454311,\n \"avg_ts\": 2.170746,\n \"stddev_ts\": 0.059347,\n \"samples_ns\": [ 242913108720, 230050093215, 234982324261 ],\n \"samples_ts\": [ 2.10775, 2.2256, 2.17889 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T12:18:17Z", "avg_ns": 100449908193, "stddev_ns": 480466923, "avg_ts": 5.312427, "stddev_ts": 1.357104, "samples_ns": [ 75236818022, 102832542594, 123280363964 ], "samples_ts": [ 6.80518, 4.97897, 4.15314 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T12:25:23Z", "avg_ns": 235981842065, "stddev_ns": 3800454311, "avg_ts": 2.170746, "stddev_ts": 0.059347, "samples_ns": [ 242913108720, 230050093215, 234982324261 ], "samples_ts": [ 2.10775, 2.2256, 2.17889 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 455 }, { "timestamp_utc": "2025-12-09T12:41:59.637720+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T12:37:14Z\",\n \"avg_ns\": 33448465933,\n \"stddev_ns\": 25766526,\n \"avg_ts\": 3.826784,\n \"stddev_ts\": 0.002949,\n \"samples_ns\": [ 33460820258, 33465728234, 33418849308 ],\n \"samples_ts\": [ 3.82537, 3.82481, 3.83017 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T12:39:26Z\",\n \"avg_ns\": 50673016741,\n \"stddev_ns\": 3759823784,\n \"avg_ts\": 2.590006,\n \"stddev_ts\": 0.471722,\n \"samples_ns\": [ 62582033783, 44680036398, 44756980042 ],\n \"samples_ts\": [ 2.04532, 2.86481, 2.85989 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T12:37:14Z", "avg_ns": 33448465933, "stddev_ns": 25766526, "avg_ts": 3.826784, "stddev_ts": 0.002949, "samples_ns": [ 33460820258, 33465728234, 33418849308 ], "samples_ts": [ 3.82537, 3.82481, 3.83017 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T12:39:26Z", "avg_ns": 50673016741, "stddev_ns": 3759823784, "avg_ts": 2.590006, "stddev_ts": 0.471722, "samples_ns": [ 62582033783, 44680036398, 44756980042 ], "samples_ts": [ 2.04532, 2.86481, 2.85989 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 456 }, { "timestamp_utc": "2025-12-09T12:56:08.655598+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T12:42:02Z\",\n \"avg_ns\": 33124215049,\n \"stddev_ns\": 55205376,\n \"avg_ts\": 3.864250,\n \"stddev_ts\": 0.006446,\n \"samples_ns\": [ 33060650362, 33160150509, 33151844277 ],\n \"samples_ts\": [ 3.87167, 3.86005, 3.86102 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T12:44:04Z\",\n \"avg_ns\": 241217153607,\n \"stddev_ns\": 3776351345,\n \"avg_ts\": 2.130897,\n \"stddev_ts\": 0.164267,\n \"samples_ns\": [ 222103963366, 242830618402, 258716879055 ],\n \"samples_ts\": [ 2.30523, 2.10847, 1.979 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T12:42:02Z", "avg_ns": 33124215049, "stddev_ns": 55205376, "avg_ts": 3.86425, "stddev_ts": 0.006446, "samples_ns": [ 33060650362, 33160150509, 33151844277 ], "samples_ts": [ 3.87167, 3.86005, 3.86102 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T12:44:04Z", "avg_ns": 241217153607, "stddev_ns": 3776351345, "avg_ts": 2.130897, "stddev_ts": 0.164267, "samples_ns": [ 222103963366, 242830618402, 258716879055 ], "samples_ts": [ 2.30523, 2.10847, 1.979 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 457 }, { "timestamp_utc": "2025-12-09T13:06:04.666913+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T12:56:11Z\",\n \"avg_ns\": 101686318130,\n \"stddev_ns\": 3945609575,\n \"avg_ts\": 5.293494,\n \"stddev_ts\": 1.327393,\n \"samples_ns\": [ 135941724397, 82980590179, 86136639816 ],\n \"samples_ts\": [ 3.76632, 6.17012, 5.94404 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:02:40Z\",\n \"avg_ns\": 67666997682,\n \"stddev_ns\": 1577586773,\n \"avg_ts\": 2.014941,\n \"stddev_ts\": 0.660159,\n \"samples_ns\": [ 82448709783, 74358969117, 46193314146 ],\n \"samples_ts\": [ 1.55248, 1.72138, 2.77096 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T12:56:11Z", "avg_ns": 101686318130, "stddev_ns": 3945609575, "avg_ts": 5.293494, "stddev_ts": 1.327393, "samples_ns": [ 135941724397, 82980590179, 86136639816 ], "samples_ts": [ 3.76632, 6.17012, 5.94404 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T13:02:40Z", "avg_ns": 67666997682, "stddev_ns": 1577586773, "avg_ts": 2.014941, "stddev_ts": 0.660159, "samples_ns": [ 82448709783, 74358969117, 46193314146 ], "samples_ts": [ 1.55248, 1.72138, 2.77096 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 458 }, { "timestamp_utc": "2025-12-09T13:25:25.791758+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:06:07Z\",\n \"avg_ns\": 100267480069,\n \"stddev_ns\": 1807457923,\n \"avg_ts\": 5.380212,\n \"stddev_ts\": 1.384971,\n \"samples_ns\": [ 134466420218, 86564329932, 79771690058 ],\n \"samples_ts\": [ 3.80764, 5.91468, 6.41832 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:12:28Z\",\n \"avg_ns\": 258885550955,\n \"stddev_ns\": 822558586,\n \"avg_ts\": 1.977904,\n \"stddev_ts\": 0.024206,\n \"samples_ns\": [ 255258868925, 260510855390, 260886928550 ],\n \"samples_ts\": [ 2.00581, 1.96537, 1.96254 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T13:06:07Z", "avg_ns": 100267480069, "stddev_ns": 1807457923, "avg_ts": 5.380212, "stddev_ts": 1.384971, "samples_ns": [ 134466420218, 86564329932, 79771690058 ], "samples_ts": [ 3.80764, 5.91468, 6.41832 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T13:12:28Z", "avg_ns": 258885550955, "stddev_ns": 822558586, "avg_ts": 1.977904, "stddev_ts": 0.024206, "samples_ns": [ 255258868925, 260510855390, 260886928550 ], "samples_ts": [ 2.00581, 1.96537, 1.96254 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 459 }, { "timestamp_utc": "2025-12-09T13:30:14.082092+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:25:28Z\",\n \"avg_ns\": 18574672421,\n \"stddev_ns\": 6714724,\n \"avg_ts\": 6.891105,\n \"stddev_ts\": 0.002491,\n \"samples_ns\": [ 18572365984, 18569414884, 18582236395 ],\n \"samples_ts\": [ 6.89196, 6.89306, 6.8883 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:26:43Z\",\n \"avg_ns\": 70050316225,\n \"stddev_ns\": 746453708,\n \"avg_ts\": 1.885572,\n \"stddev_ts\": 0.430114,\n \"samples_ns\": [ 53805492790, 80528712158, 75816743727 ],\n \"samples_ts\": [ 2.37894, 1.5895, 1.68828 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T13:25:28Z", "avg_ns": 18574672421, "stddev_ns": 6714724, "avg_ts": 6.891105, "stddev_ts": 0.002491, "samples_ns": [ 18572365984, 18569414884, 18582236395 ], "samples_ts": [ 6.89196, 6.89306, 6.8883 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T13:26:43Z", "avg_ns": 70050316225, "stddev_ns": 746453708, "avg_ts": 1.885572, "stddev_ts": 0.430114, "samples_ns": [ 53805492790, 80528712158, 75816743727 ], "samples_ts": [ 2.37894, 1.5895, 1.68828 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 460 }, { "timestamp_utc": "2025-12-09T13:43:44.561969+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:30:17Z\",\n \"avg_ns\": 18533730280,\n \"stddev_ns\": 22880152,\n \"avg_ts\": 6.906334,\n \"stddev_ts\": 0.008523,\n \"samples_ns\": [ 18558311416, 18513054687, 18529824738 ],\n \"samples_ts\": [ 6.89718, 6.91404, 6.90778 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:31:31Z\",\n \"avg_ns\": 244105890686,\n \"stddev_ns\": 551403125,\n \"avg_ts\": 2.106791,\n \"stddev_ts\": 0.175820,\n \"samples_ns\": [ 256738227431, 253876373611, 221703071017 ],\n \"samples_ts\": [ 1.99425, 2.01673, 2.3094 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T13:30:17Z", "avg_ns": 18533730280, "stddev_ns": 22880152, "avg_ts": 6.906334, "stddev_ts": 0.008523, "samples_ns": [ 18558311416, 18513054687, 18529824738 ], "samples_ts": [ 6.89718, 6.91404, 6.90778 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T13:31:31Z", "avg_ns": 244105890686, "stddev_ns": 551403125, "avg_ts": 2.106791, "stddev_ts": 0.17582, "samples_ns": [ 256738227431, 253876373611, 221703071017 ], "samples_ts": [ 1.99425, 2.01673, 2.3094 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 461 }, { "timestamp_utc": "2025-12-09T13:53:24.289447+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:43:50Z\",\n \"avg_ns\": 99818225981,\n \"stddev_ns\": 4000251715,\n \"avg_ts\": 5.326091,\n \"stddev_ts\": 1.337275,\n \"samples_ns\": [ 74595814963, 115473519941, 109385343039 ],\n \"samples_ts\": [ 6.86366, 4.43392, 4.6807 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:50:38Z\",\n \"avg_ns\": 54851129087,\n \"stddev_ns\": 4026724410,\n \"avg_ts\": 2.469401,\n \"stddev_ts\": 0.652893,\n \"samples_ns\": [ 44954774269, 44985046761, 74613566232 ],\n \"samples_ts\": [ 2.84731, 2.84539, 1.71551 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T13:43:50Z", "avg_ns": 99818225981, "stddev_ns": 4000251715, "avg_ts": 5.326091, "stddev_ts": 1.337275, "samples_ns": [ 74595814963, 115473519941, 109385343039 ], "samples_ts": [ 6.86366, 4.43392, 4.6807 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T13:50:38Z", "avg_ns": 54851129087, "stddev_ns": 4026724410, "avg_ts": 2.469401, "stddev_ts": 0.652893, "samples_ns": [ 44954774269, 44985046761, 74613566232 ], "samples_ts": [ 2.84731, 2.84539, 1.71551 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 462 }, { "timestamp_utc": "2025-12-09T14:12:03.959107+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:53:30Z\",\n \"avg_ns\": 99841460834,\n \"stddev_ns\": 1192512846,\n \"avg_ts\": 5.329801,\n \"stddev_ts\": 1.343744,\n \"samples_ns\": [ 74659778169, 106677331072, 118187273263 ],\n \"samples_ts\": [ 6.85778, 4.79952, 4.33211 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:00:27Z\",\n \"avg_ns\": 231829828595,\n \"stddev_ns\": 3782929219,\n \"avg_ts\": 2.210445,\n \"stddev_ts\": 0.080364,\n \"samples_ns\": [ 239331930969, 222840492363, 233317062455 ],\n \"samples_ts\": [ 2.13929, 2.29761, 2.19444 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T13:53:30Z", "avg_ns": 99841460834, "stddev_ns": 1192512846, "avg_ts": 5.329801, "stddev_ts": 1.343744, "samples_ns": [ 74659778169, 106677331072, 118187273263 ], "samples_ts": [ 6.85778, 4.79952, 4.33211 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T14:00:27Z", "avg_ns": 231829828595, "stddev_ns": 3782929219, "avg_ts": 2.210445, "stddev_ts": 0.080364, "samples_ns": [ 239331930969, 222840492363, 233317062455 ], "samples_ts": [ 2.13929, 2.29761, 2.19444 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 463 }, { "timestamp_utc": "2025-12-09T14:16:51.515703+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:12:07Z\",\n \"avg_ns\": 33330033917,\n \"stddev_ns\": 7135070,\n \"avg_ts\": 3.840380,\n \"stddev_ts\": 0.000822,\n \"samples_ns\": [ 33322162685, 33336069218, 33331869849 ],\n \"samples_ts\": [ 3.84129, 3.83968, 3.84017 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:14:12Z\",\n \"avg_ns\": 52756311089,\n \"stddev_ns\": 3764458763,\n \"avg_ts\": 2.530567,\n \"stddev_ts\": 0.585718,\n \"samples_ns\": [ 69030378492, 44687380958, 44551173818 ],\n \"samples_ts\": [ 1.85426, 2.86434, 2.8731 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T14:12:07Z", "avg_ns": 33330033917, "stddev_ns": 7135070, "avg_ts": 3.84038, "stddev_ts": 0.000822, "samples_ns": [ 33322162685, 33336069218, 33331869849 ], "samples_ts": [ 3.84129, 3.83968, 3.84017 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T14:14:12Z", "avg_ns": 52756311089, "stddev_ns": 3764458763, "avg_ts": 2.530567, "stddev_ts": 0.585718, "samples_ns": [ 69030378492, 44687380958, 44551173818 ], "samples_ts": [ 1.85426, 2.86434, 2.8731 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 464 }, { "timestamp_utc": "2025-12-09T14:31:00.907309+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:16:54Z\",\n \"avg_ns\": 32275130436,\n \"stddev_ns\": 1876955302,\n \"avg_ts\": 3.975176,\n \"stddev_ts\": 0.239203,\n \"samples_ns\": [ 30107943389, 33379613394, 33337834527 ],\n \"samples_ts\": [ 4.25137, 3.83468, 3.83948 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:18:50Z\",\n \"avg_ns\": 243232927730,\n \"stddev_ns\": 3760611063,\n \"avg_ts\": 2.111182,\n \"stddev_ts\": 0.142097,\n \"samples_ns\": [ 225534719306, 247719700890, 256444362995 ],\n \"samples_ts\": [ 2.27016, 2.06685, 1.99653 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T14:16:54Z", "avg_ns": 32275130436, "stddev_ns": 1876955302, "avg_ts": 3.975176, "stddev_ts": 0.239203, "samples_ns": [ 30107943389, 33379613394, 33337834527 ], "samples_ts": [ 4.25137, 3.83468, 3.83948 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T14:18:50Z", "avg_ns": 243232927730, "stddev_ns": 3760611063, "avg_ts": 2.111182, "stddev_ts": 0.142097, "samples_ns": [ 225534719306, 247719700890, 256444362995 ], "samples_ts": [ 2.27016, 2.06685, 1.99653 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 465 }, { "timestamp_utc": "2025-12-09T14:40:51.596526+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:31:04Z\",\n \"avg_ns\": 101348968951,\n \"stddev_ns\": 4013048396,\n \"avg_ts\": 5.327383,\n \"stddev_ts\": 1.394469,\n \"samples_ns\": [ 135714929835, 89166707897, 79165269121 ],\n \"samples_ts\": [ 3.77261, 5.74205, 6.46748 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:37:25Z\",\n \"avg_ns\": 68362780363,\n \"stddev_ns\": 681306632,\n \"avg_ts\": 2.009195,\n \"stddev_ts\": 0.704145,\n \"samples_ns\": [ 79844922876, 79889872842, 45353545371 ],\n \"samples_ts\": [ 1.60311, 1.60221, 2.82227 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T14:31:04Z", "avg_ns": 101348968951, "stddev_ns": 4013048396, "avg_ts": 5.327383, "stddev_ts": 1.394469, "samples_ns": [ 135714929835, 89166707897, 79165269121 ], "samples_ts": [ 3.77261, 5.74205, 6.46748 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T14:37:25Z", "avg_ns": 68362780363, "stddev_ns": 681306632, "avg_ts": 2.009195, "stddev_ts": 0.704145, "samples_ns": [ 79844922876, 79889872842, 45353545371 ], "samples_ts": [ 1.60311, 1.60221, 2.82227 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 466 }, { "timestamp_utc": "2025-12-09T14:59:49.818810+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:40:54Z\",\n \"avg_ns\": 100551648840,\n \"stddev_ns\": 1945995649,\n \"avg_ts\": 5.341564,\n \"stddev_ts\": 1.407669,\n \"samples_ns\": [ 129315245699, 96749307475, 75590393346 ],\n \"samples_ts\": [ 3.95932, 5.29203, 6.77335 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:47:12Z\",\n \"avg_ns\": 252274565862,\n \"stddev_ns\": 1894849411,\n \"avg_ts\": 2.030008,\n \"stddev_ts\": 0.038176,\n \"samples_ns\": [ 254653190051, 255303524544, 246866982992 ],\n \"samples_ts\": [ 2.01058, 2.00546, 2.07399 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T14:40:54Z", "avg_ns": 100551648840, "stddev_ns": 1945995649, "avg_ts": 5.341564, "stddev_ts": 1.407669, "samples_ns": [ 129315245699, 96749307475, 75590393346 ], "samples_ts": [ 3.95932, 5.29203, 6.77335 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T14:47:12Z", "avg_ns": 252274565862, "stddev_ns": 1894849411, "avg_ts": 2.030008, "stddev_ts": 0.038176, "samples_ns": [ 254653190051, 255303524544, 246866982992 ], "samples_ts": [ 2.01058, 2.00546, 2.07399 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q4_K_M", "threads": 1, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 467 }, { "timestamp_utc": "2025-12-09T15:04:02.735306+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:59:56Z\",\n \"avg_ns\": 9410550768,\n \"stddev_ns\": 11853426,\n \"avg_ts\": 13.601769,\n \"stddev_ts\": 0.017142,\n \"samples_ns\": [ 9415267314, 9419319065, 9397065927 ],\n \"samples_ts\": [ 13.5949, 13.5891, 13.6213 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:00:42Z\",\n \"avg_ns\": 66511966679,\n \"stddev_ns\": 2097037513,\n \"avg_ts\": 2.705606,\n \"stddev_ts\": 2.131446,\n \"samples_ns\": [ 24797576523, 79344894300, 95393429214 ],\n \"samples_ts\": [ 5.16179, 1.61321, 1.34181 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T14:59:56Z", "avg_ns": 9410550768, "stddev_ns": 11853426, "avg_ts": 13.601769, "stddev_ts": 0.017142, "samples_ns": [ 9415267314, 9419319065, 9397065927 ], "samples_ts": [ 13.5949, 13.5891, 13.6213 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T15:00:42Z", "avg_ns": 66511966679, "stddev_ns": 2097037513, "avg_ts": 2.705606, "stddev_ts": 2.131446, "samples_ns": [ 24797576523, 79344894300, 95393429214 ], "samples_ts": [ 5.16179, 1.61321, 1.34181 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 512, "ubatch_size": 128, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 468 }, { "timestamp_utc": "2025-12-09T15:16:16.283411+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:04:08Z\",\n \"avg_ns\": 15738202616,\n \"stddev_ns\": 1163526527,\n \"avg_ts\": 10.596323,\n \"stddev_ts\": 5.280802,\n \"samples_ns\": [ 28453422986, 9383536376, 9377648486 ],\n \"samples_ts\": [ 4.49858, 13.6409, 13.6495 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:05:28Z\",\n \"avg_ns\": 215698381282,\n \"stddev_ns\": 2041748721,\n \"avg_ts\": 2.409182,\n \"stddev_ts\": 0.343158,\n \"samples_ns\": [ 254337556463, 195702509507, 197055077876 ],\n \"samples_ts\": [ 2.01307, 2.61622, 2.59826 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T15:04:08Z", "avg_ns": 15738202616, "stddev_ns": 1163526527, "avg_ts": 10.596323, "stddev_ts": 5.280802, "samples_ns": [ 28453422986, 9383536376, 9377648486 ], "samples_ts": [ 4.49858, 13.6409, 13.6495 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T15:05:28Z", "avg_ns": 215698381282, "stddev_ns": 2041748721, "avg_ts": 2.409182, "stddev_ts": 0.343158, "samples_ns": [ 254337556463, 195702509507, 197055077876 ], "samples_ts": [ 2.01307, 2.61622, 2.59826 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 512, "ubatch_size": 128, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 469 }, { "timestamp_utc": "2025-12-09T15:25:42.476178+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:16:20Z\",\n \"avg_ns\": 78401584239,\n \"stddev_ns\": 3907260955,\n \"avg_ts\": 7.978288,\n \"stddev_ts\": 4.815141,\n \"samples_ns\": [ 103847771543, 37843430380, 93513550796 ],\n \"samples_ts\": [ 4.93029, 13.5294, 5.47514 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:22:31Z\",\n \"avg_ns\": 63476168807,\n \"stddev_ns\": 889539140,\n \"avg_ts\": 2.775195,\n \"stddev_ts\": 2.071506,\n \"samples_ns\": [ 96311784290, 69254160307, 24862561824 ],\n \"samples_ts\": [ 1.32902, 1.84826, 5.1483 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T15:16:20Z", "avg_ns": 78401584239, "stddev_ns": 3907260955, "avg_ts": 7.978288, "stddev_ts": 4.815141, "samples_ns": [ 103847771543, 37843430380, 93513550796 ], "samples_ts": [ 4.93029, 13.5294, 5.47514 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T15:22:31Z", "avg_ns": 63476168807, "stddev_ns": 889539140, "avg_ts": 2.775195, "stddev_ts": 2.071506, "samples_ns": [ 96311784290, 69254160307, 24862561824 ], "samples_ts": [ 1.32902, 1.84826, 5.1483 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 512, "ubatch_size": 128, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 470 }, { "timestamp_utc": "2025-12-09T15:43:15.300153+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:25:45Z\",\n \"avg_ns\": 88281320443,\n \"stddev_ns\": 2051304722,\n \"avg_ts\": 7.621436,\n \"stddev_ts\": 5.219665,\n \"samples_ns\": [ 132885578127, 94253725434, 37704657770 ],\n \"samples_ts\": [ 3.85294, 5.43215, 13.5792 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:31:01Z\",\n \"avg_ns\": 244468508551,\n \"stddev_ns\": 3909511834,\n \"avg_ts\": 2.146726,\n \"stddev_ts\": 0.433812,\n \"samples_ns\": [ 272456371636, 267541077012, 193408077005 ],\n \"samples_ts\": [ 1.8792, 1.91372, 2.64725 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T15:25:45Z", "avg_ns": 88281320443, "stddev_ns": 2051304722, "avg_ts": 7.621436, "stddev_ts": 5.219665, "samples_ns": [ 132885578127, 94253725434, 37704657770 ], "samples_ts": [ 3.85294, 5.43215, 13.5792 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T15:31:01Z", "avg_ns": 244468508551, "stddev_ns": 3909511834, "avg_ts": 2.146726, "stddev_ts": 0.433812, "samples_ns": [ 272456371636, 267541077012, 193408077005 ], "samples_ts": [ 1.8792, 1.91372, 2.64725 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 512, "ubatch_size": 128, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 471 }, { "timestamp_utc": "2025-12-09T15:47:16.237075+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:43:20Z\",\n \"avg_ns\": 27041178365,\n \"stddev_ns\": 3579536559,\n \"avg_ts\": 5.394948,\n \"stddev_ts\": 2.617623,\n \"samples_ns\": [ 32958765960, 32958387053, 15206382083 ],\n \"samples_ts\": [ 3.88364, 3.88369, 8.41752 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:45:15Z\",\n \"avg_ns\": 40122657447,\n \"stddev_ns\": 3716218494,\n \"avg_ts\": 4.057615,\n \"stddev_ts\": 1.952594,\n \"samples_ns\": [ 24655604235, 24718244358, 70994123749 ],\n \"samples_ts\": [ 5.19152, 5.17836, 1.80297 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T15:43:20Z", "avg_ns": 27041178365, "stddev_ns": 3579536559, "avg_ts": 5.394948, "stddev_ts": 2.617623, "samples_ns": [ 32958765960, 32958387053, 15206382083 ], "samples_ts": [ 3.88364, 3.88369, 8.41752 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T15:45:15Z", "avg_ns": 40122657447, "stddev_ns": 3716218494, "avg_ts": 4.057615, "stddev_ts": 1.952594, "samples_ns": [ 24655604235, 24718244358, 70994123749 ], "samples_ts": [ 5.19152, 5.17836, 1.80297 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 512, "ubatch_size": 256, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 472 }, { "timestamp_utc": "2025-12-09T16:00:43.302757+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:47:22Z\",\n \"avg_ns\": 33142510702,\n \"stddev_ns\": 21078692,\n \"avg_ts\": 3.862110,\n \"stddev_ts\": 0.002457,\n \"samples_ns\": [ 33120285171, 33145030994, 33162215941 ],\n \"samples_ts\": [ 3.8647, 3.86182, 3.85981 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:49:34Z\",\n \"avg_ns\": 222536373185,\n \"stddev_ns\": 3910148045,\n \"avg_ts\": 2.352202,\n \"stddev_ts\": 0.405574,\n \"samples_ns\": [ 194330490985, 202016760692, 271261867880 ],\n \"samples_ts\": [ 2.63469, 2.53444, 1.88748 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T15:47:22Z", "avg_ns": 33142510702, "stddev_ns": 21078692, "avg_ts": 3.86211, "stddev_ts": 0.002457, "samples_ns": [ 33120285171, 33145030994, 33162215941 ], "samples_ts": [ 3.8647, 3.86182, 3.85981 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T15:49:34Z", "avg_ns": 222536373185, "stddev_ns": 3910148045, "avg_ts": 2.352202, "stddev_ts": 0.405574, "samples_ns": [ 194330490985, 202016760692, 271261867880 ], "samples_ts": [ 2.63469, 2.53444, 1.88748 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 512, "ubatch_size": 256, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 473 }, { "timestamp_utc": "2025-12-09T16:10:08.629948+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T16:00:46Z\",\n \"avg_ns\": 87460604016,\n \"stddev_ns\": 1658179427,\n \"avg_ts\": 7.649309,\n \"stddev_ts\": 5.167497,\n \"samples_ns\": [ 131936128369, 92631807380, 37813876301 ],\n \"samples_ts\": [ 3.88067, 5.52726, 13.54 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T16:06:01Z\",\n \"avg_ns\": 82158828453,\n \"stddev_ns\": 4098459199,\n \"avg_ts\": 1.620374,\n \"stddev_ts\": 0.417413,\n \"samples_ns\": [ 60885753196, 93250987563, 92339744600 ],\n \"samples_ts\": [ 2.1023, 1.37264, 1.38619 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T16:00:46Z", "avg_ns": 87460604016, "stddev_ns": 1658179427, "avg_ts": 7.649309, "stddev_ts": 5.167497, "samples_ns": [ 131936128369, 92631807380, 37813876301 ], "samples_ts": [ 3.88067, 5.52726, 13.54 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T16:06:01Z", "avg_ns": 82158828453, "stddev_ns": 4098459199, "avg_ts": 1.620374, "stddev_ts": 0.417413, "samples_ns": [ 60885753196, 93250987563, 92339744600 ], "samples_ts": [ 2.1023, 1.37264, 1.38619 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 512, "ubatch_size": 256, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 474 }, { "timestamp_utc": "2025-12-09T16:26:12.402739+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T16:10:11Z\",\n \"avg_ns\": 92335099288,\n \"stddev_ns\": 4055883808,\n \"avg_ts\": 6.143659,\n \"stddev_ts\": 2.342153,\n \"samples_ns\": [ 59747097245, 131442741902, 85815458718 ],\n \"samples_ts\": [ 8.56945, 3.89523, 5.96629 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T16:15:26Z\",\n \"avg_ns\": 215133750552,\n \"stddev_ns\": 3903209960,\n \"avg_ts\": 2.407494,\n \"stddev_ts\": 0.304535,\n \"samples_ns\": [ 248727548477, 195481250273, 201192452907 ],\n \"samples_ts\": [ 2.05848, 2.61918, 2.54483 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T16:10:11Z", "avg_ns": 92335099288, "stddev_ns": 4055883808, "avg_ts": 6.143659, "stddev_ts": 2.342153, "samples_ns": [ 59747097245, 131442741902, 85815458718 ], "samples_ts": [ 8.56945, 3.89523, 5.96629 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T16:15:26Z", "avg_ns": 215133750552, "stddev_ns": 3903209960, "avg_ts": 2.407494, "stddev_ts": 0.304535, "samples_ns": [ 248727548477, 195481250273, 201192452907 ], "samples_ts": [ 2.05848, 2.61918, 2.54483 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 512, "ubatch_size": 256, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 475 }, { "timestamp_utc": "2025-12-09T16:30:56.085637+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T16:26:15Z\",\n \"avg_ns\": 33138966800,\n \"stddev_ns\": 203350233,\n \"avg_ts\": 3.862619,\n \"stddev_ts\": 0.023623,\n \"samples_ns\": [ 33000917528, 33372484950, 33043497922 ],\n \"samples_ts\": [ 3.87868, 3.8355, 3.87368 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T16:28:25Z\",\n \"avg_ns\": 49734191852,\n \"stddev_ns\": 4155655612,\n \"avg_ts\": 3.607303,\n \"stddev_ts\": 2.001705,\n \"samples_ns\": [ 94668227980, 29792725904, 24741621673 ],\n \"samples_ts\": [ 1.35209, 4.29635, 5.17347 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T16:26:15Z", "avg_ns": 33138966800, "stddev_ns": 203350233, "avg_ts": 3.862619, "stddev_ts": 0.023623, "samples_ns": [ 33000917528, 33372484950, 33043497922 ], "samples_ts": [ 3.87868, 3.8355, 3.87368 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T16:28:25Z", "avg_ns": 49734191852, "stddev_ns": 4155655612, "avg_ts": 3.607303, "stddev_ts": 2.001705, "samples_ns": [ 94668227980, 29792725904, 24741621673 ], "samples_ts": [ 1.35209, 4.29635, 5.17347 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 512, "ubatch_size": 512, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 476 }, { "timestamp_utc": "2025-12-09T16:45:10.074429+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T16:30:59Z\",\n \"avg_ns\": 25267493686,\n \"stddev_ns\": 899407290,\n \"avg_ts\": 6.307915,\n \"stddev_ts\": 3.999352,\n \"samples_ns\": [ 11718791281, 30823644830, 33260044947 ],\n \"samples_ts\": [ 10.9226, 4.15266, 3.84846 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T16:32:24Z\",\n \"avg_ns\": 254813744291,\n \"stddev_ns\": 1825539787,\n \"avg_ts\": 2.030683,\n \"stddev_ts\": 0.264569,\n \"samples_ns\": [ 219161279220, 272512634449, 272767319204 ],\n \"samples_ts\": [ 2.33618, 1.87881, 1.87706 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T16:30:59Z", "avg_ns": 25267493686, "stddev_ns": 899407290, "avg_ts": 6.307915, "stddev_ts": 3.999352, "samples_ns": [ 11718791281, 30823644830, 33260044947 ], "samples_ts": [ 10.9226, 4.15266, 3.84846 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T16:32:24Z", "avg_ns": 254813744291, "stddev_ns": 1825539787, "avg_ts": 2.030683, "stddev_ts": 0.264569, "samples_ns": [ 219161279220, 272512634449, 272767319204 ], "samples_ts": [ 2.33618, 1.87881, 1.87706 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 512, "ubatch_size": 512, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 477 }, { "timestamp_utc": "2025-12-09T16:53:03.762074+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T16:45:13Z\",\n \"avg_ns\": 92535179674,\n \"stddev_ns\": 1995731193,\n \"avg_ts\": 6.037745,\n \"stddev_ts\": 2.024586,\n \"samples_ns\": [ 64856596855, 131989182945, 80759759222 ],\n \"samples_ts\": [ 7.89434, 3.87911, 6.33979 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T16:50:28Z\",\n \"avg_ns\": 51384424064,\n \"stddev_ns\": 4238519642,\n \"avg_ts\": 3.375039,\n \"stddev_ts\": 1.945989,\n \"samples_ns\": [ 24329581566, 36677872071, 93145818556 ],\n \"samples_ts\": [ 5.26109, 3.48984, 1.37419 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T16:45:13Z", "avg_ns": 92535179674, "stddev_ns": 1995731193, "avg_ts": 6.037745, "stddev_ts": 2.024586, "samples_ns": [ 64856596855, 131989182945, 80759759222 ], "samples_ts": [ 7.89434, 3.87911, 6.33979 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T16:50:28Z", "avg_ns": 51384424064, "stddev_ns": 4238519642, "avg_ts": 3.375039, "stddev_ts": 1.945989, "samples_ns": [ 24329581566, 36677872071, 93145818556 ], "samples_ts": [ 5.26109, 3.48984, 1.37419 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 512, "ubatch_size": 512, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 478 }, { "timestamp_utc": "2025-12-09T17:10:39.461136+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T16:53:09Z\",\n \"avg_ns\": 81338725792,\n \"stddev_ns\": 3446700760,\n \"avg_ts\": 8.087837,\n \"stddev_ts\": 4.903114,\n \"samples_ns\": [ 38002421712, 74103429239, 131910326425 ],\n \"samples_ts\": [ 13.4728, 6.90926, 3.88142 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T16:59:16Z\",\n \"avg_ns\": 227421712470,\n \"stddev_ns\": 1548118210,\n \"avg_ts\": 2.295875,\n \"stddev_ts\": 0.378947,\n \"samples_ns\": [ 195600238480, 214057338522, 272607560410 ],\n \"samples_ts\": [ 2.61758, 2.39188, 1.87816 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T16:53:09Z", "avg_ns": 81338725792, "stddev_ns": 3446700760, "avg_ts": 8.087837, "stddev_ts": 4.903114, "samples_ns": [ 38002421712, 74103429239, 131910326425 ], "samples_ts": [ 13.4728, 6.90926, 3.88142 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T16:59:16Z", "avg_ns": 227421712470, "stddev_ns": 1548118210, "avg_ts": 2.295875, "stddev_ts": 0.378947, "samples_ns": [ 195600238480, 214057338522, 272607560410 ], "samples_ts": [ 2.61758, 2.39188, 1.87816 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 512, "ubatch_size": 512, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 479 }, { "timestamp_utc": "2025-12-09T17:15:25.498164+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T17:10:42Z\",\n \"avg_ns\": 10885723431,\n \"stddev_ns\": 2588848592,\n \"avg_ts\": 12.161725,\n \"stddev_ts\": 2.543123,\n \"samples_ns\": [ 9390065185, 9392037037, 13875068073 ],\n \"samples_ts\": [ 13.6314, 13.6286, 9.22518 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T17:11:24Z\",\n \"avg_ns\": 79910698072,\n \"stddev_ns\": 4147118267,\n \"avg_ts\": 1.781862,\n \"stddev_ts\": 0.776096,\n \"samples_ns\": [ 96131860703, 95803668766, 47796564748 ],\n \"samples_ts\": [ 1.3315, 1.33607, 2.67802 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T17:10:42Z", "avg_ns": 10885723431, "stddev_ns": 2588848592, "avg_ts": 12.161725, "stddev_ts": 2.543123, "samples_ns": [ 9390065185, 9392037037, 13875068073 ], "samples_ts": [ 13.6314, 13.6286, 9.22518 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T17:11:24Z", "avg_ns": 79910698072, "stddev_ns": 4147118267, "avg_ts": 1.781862, "stddev_ts": 0.776096, "samples_ns": [ 96131860703, 95803668766, 47796564748 ], "samples_ts": [ 1.3315, 1.33607, 2.67802 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 480 }, { "timestamp_utc": "2025-12-09T17:28:42.581358+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T17:15:28Z\",\n \"avg_ns\": 9414709658,\n \"stddev_ns\": 346029,\n \"avg_ts\": 13.595746,\n \"stddev_ts\": 0.000500,\n \"samples_ns\": [ 9414964728, 9414848470, 9414315776 ],\n \"samples_ts\": [ 13.5954, 13.5955, 13.5963 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T17:16:06Z\",\n \"avg_ns\": 251857743399,\n \"stddev_ns\": 1941249506,\n \"avg_ts\": 2.063244,\n \"stddev_ts\": 0.319781,\n \"samples_ns\": [ 270632222255, 274430735042, 210510272902 ],\n \"samples_ts\": [ 1.89187, 1.86568, 2.43219 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T17:15:28Z", "avg_ns": 9414709658, "stddev_ns": 346029, "avg_ts": 13.595746, "stddev_ts": 0.0005, "samples_ns": [ 9414964728, 9414848470, 9414315776 ], "samples_ts": [ 13.5954, 13.5955, 13.5963 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T17:16:06Z", "avg_ns": 251857743399, "stddev_ns": 1941249506, "avg_ts": 2.063244, "stddev_ts": 0.319781, "samples_ns": [ 270632222255, 274430735042, 210510272902 ], "samples_ts": [ 1.89187, 1.86568, 2.43219 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 481 }, { "timestamp_utc": "2025-12-09T17:36:41.490444+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T17:28:48Z\",\n \"avg_ns\": 91109813600,\n \"stddev_ns\": 3346148868,\n \"avg_ts\": 7.464427,\n \"stddev_ts\": 5.297298,\n \"samples_ns\": [ 37802702007, 102234562500, 133292176295 ],\n \"samples_ts\": [ 13.544, 5.00809, 3.84119 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T17:34:56Z\",\n \"avg_ns\": 34506586286,\n \"stddev_ns\": 3955759926,\n \"avg_ts\": 3.938446,\n \"stddev_ts\": 1.157988,\n \"samples_ns\": [ 32943478147, 24990117408, 45586163304 ],\n \"samples_ts\": [ 3.88544, 5.12202, 2.80787 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T17:28:48Z", "avg_ns": 91109813600, "stddev_ns": 3346148868, "avg_ts": 7.464427, "stddev_ts": 5.297298, "samples_ns": [ 37802702007, 102234562500, 133292176295 ], "samples_ts": [ 13.544, 5.00809, 3.84119 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T17:34:56Z", "avg_ns": 34506586286, "stddev_ns": 3955759926, "avg_ts": 3.938446, "stddev_ts": 1.157988, "samples_ns": [ 32943478147, 24990117408, 45586163304 ], "samples_ts": [ 3.88544, 5.12202, 2.80787 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 482 }, { "timestamp_utc": "2025-12-09T17:55:16.656127+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T17:36:47Z\",\n \"avg_ns\": 78416596840,\n \"stddev_ns\": 2531973332,\n \"avg_ts\": 8.028317,\n \"stddev_ts\": 4.826137,\n \"samples_ns\": [ 86404248619, 37788568915, 111056972986 ],\n \"samples_ts\": [ 5.92563, 13.5491, 4.61025 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T17:42:55Z\",\n \"avg_ns\": 246619156388,\n \"stddev_ns\": 900965319,\n \"avg_ts\": 2.123577,\n \"stddev_ts\": 0.410075,\n \"samples_ns\": [ 197146975241, 270555095109, 272155398815 ],\n \"samples_ts\": [ 2.59705, 1.89241, 1.88128 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T17:36:47Z", "avg_ns": 78416596840, "stddev_ns": 2531973332, "avg_ts": 8.028317, "stddev_ts": 4.826137, "samples_ns": [ 86404248619, 37788568915, 111056972986 ], "samples_ts": [ 5.92563, 13.5491, 4.61025 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T17:42:55Z", "avg_ns": 246619156388, "stddev_ns": 900965319, "avg_ts": 2.123577, "stddev_ts": 0.410075, "samples_ns": [ 197146975241, 270555095109, 272155398815 ], "samples_ts": [ 2.59705, 1.89241, 1.88128 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 483 }, { "timestamp_utc": "2025-12-09T17:59:48.924281+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T17:55:19Z\",\n \"avg_ns\": 9492156893,\n \"stddev_ns\": 14063378,\n \"avg_ts\": 13.484837,\n \"stddev_ts\": 0.019995,\n \"samples_ns\": [ 9501396145, 9499102550, 9475971984 ],\n \"samples_ts\": [ 13.4717, 13.475, 13.5078 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T17:55:57Z\",\n \"avg_ns\": 76834633388,\n \"stddev_ns\": 585966873,\n \"avg_ts\": 2.022948,\n \"stddev_ts\": 1.206876,\n \"samples_ns\": [ 37464933534, 96428468584, 96610498048 ],\n \"samples_ts\": [ 3.41653, 1.32741, 1.32491 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T17:55:19Z", "avg_ns": 9492156893, "stddev_ns": 14063378, "avg_ts": 13.484837, "stddev_ts": 0.019995, "samples_ns": [ 9501396145, 9499102550, 9475971984 ], "samples_ts": [ 13.4717, 13.475, 13.5078 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T17:55:57Z", "avg_ns": 76834633388, "stddev_ns": 585966873, "avg_ts": 2.022948, "stddev_ts": 1.206876, "samples_ns": [ 37464933534, 96428468584, 96610498048 ], "samples_ts": [ 3.41653, 1.32741, 1.32491 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 484 }, { "timestamp_utc": "2025-12-09T18:12:22.171380+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T17:59:54Z\",\n \"avg_ns\": 9390429732,\n \"stddev_ns\": 18250645,\n \"avg_ts\": 13.630933,\n \"stddev_ts\": 0.026466,\n \"samples_ns\": [ 9411192901, 9383168309, 9376927988 ],\n \"samples_ts\": [ 13.6008, 13.6414, 13.6505 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T18:00:42Z\",\n \"avg_ns\": 233104902006,\n \"stddev_ns\": 3589804316,\n \"avg_ts\": 2.233108,\n \"stddev_ts\": 0.343064,\n \"samples_ns\": [ 274359465158, 223842550555, 201112690306 ],\n \"samples_ts\": [ 1.86616, 2.28732, 2.54584 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T17:59:54Z", "avg_ns": 9390429732, "stddev_ns": 18250645, "avg_ts": 13.630933, "stddev_ts": 0.026466, "samples_ns": [ 9411192901, 9383168309, 9376927988 ], "samples_ts": [ 13.6008, 13.6414, 13.6505 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T18:00:42Z", "avg_ns": 233104902006, "stddev_ns": 3589804316, "avg_ts": 2.233108, "stddev_ts": 0.343064, "samples_ns": [ 274359465158, 223842550555, 201112690306 ], "samples_ts": [ 1.86616, 2.28732, 2.54584 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 485 }, { "timestamp_utc": "2025-12-09T18:20:58.504123+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T18:12:27Z\",\n \"avg_ns\": 77418637906,\n \"stddev_ns\": 875922054,\n \"avg_ts\": 8.242948,\n \"stddev_ts\": 4.109804,\n \"samples_ns\": [ 58391918587, 42437526219, 131426468912 ],\n \"samples_ts\": [ 8.76834, 12.0648, 3.89571 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T18:18:31Z\",\n \"avg_ns\": 48604859754,\n \"stddev_ns\": 80541632,\n \"avg_ts\": 3.873335,\n \"stddev_ts\": 2.200240,\n \"samples_ns\": [ 96044415060, 24881350343, 24888813861 ],\n \"samples_ts\": [ 1.33272, 5.14442, 5.14287 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T18:12:27Z", "avg_ns": 77418637906, "stddev_ns": 875922054, "avg_ts": 8.242948, "stddev_ts": 4.109804, "samples_ns": [ 58391918587, 42437526219, 131426468912 ], "samples_ts": [ 8.76834, 12.0648, 3.89571 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T18:18:31Z", "avg_ns": 48604859754, "stddev_ns": 80541632, "avg_ts": 3.873335, "stddev_ts": 2.20024, "samples_ns": [ 96044415060, 24881350343, 24888813861 ], "samples_ts": [ 1.33272, 5.14442, 5.14287 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 486 }, { "timestamp_utc": "2025-12-09T18:39:37.404799+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T18:21:01Z\",\n \"avg_ns\": 77211911113,\n \"stddev_ns\": 1498695206,\n \"avg_ts\": 8.193771,\n \"stddev_ts\": 3.905395,\n \"samples_ns\": [ 131266843703, 55983319293, 44385570344 ],\n \"samples_ts\": [ 3.90045, 9.14558, 11.5353 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T18:26:23Z\",\n \"avg_ns\": 264345587564,\n \"stddev_ns\": 1855378714,\n \"avg_ts\": 1.939522,\n \"stddev_ts\": 0.088697,\n \"samples_ns\": [ 266996397033, 274704484995, 251335880666 ],\n \"samples_ts\": [ 1.91763, 1.86382, 2.03711 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T18:21:01Z", "avg_ns": 77211911113, "stddev_ns": 1498695206, "avg_ts": 8.193771, "stddev_ts": 3.905395, "samples_ns": [ 131266843703, 55983319293, 44385570344 ], "samples_ts": [ 3.90045, 9.14558, 11.5353 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T18:26:23Z", "avg_ns": 264345587564, "stddev_ns": 1855378714, "avg_ts": 1.939522, "stddev_ts": 0.088697, "samples_ns": [ 266996397033, 274704484995, 251335880666 ], "samples_ts": [ 1.91763, 1.86382, 2.03711 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 487 }, { "timestamp_utc": "2025-12-09T18:43:52.257065+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T18:39:43Z\",\n \"avg_ns\": 9376953104,\n \"stddev_ns\": 12026545,\n \"avg_ts\": 13.650504,\n \"stddev_ts\": 0.017496,\n \"samples_ns\": [ 9390627829, 9372208550, 9368022934 ],\n \"samples_ts\": [ 13.6306, 13.6574, 13.6635 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T18:40:38Z\",\n \"avg_ns\": 64232482016,\n \"stddev_ns\": 4000223275,\n \"avg_ts\": 2.762520,\n \"stddev_ts\": 2.074908,\n \"samples_ns\": [ 24919518918, 68991680358, 98786246773 ],\n \"samples_ts\": [ 5.13654, 1.8553, 1.29573 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T18:39:43Z", "avg_ns": 9376953104, "stddev_ns": 12026545, "avg_ts": 13.650504, "stddev_ts": 0.017496, "samples_ns": [ 9390627829, 9372208550, 9368022934 ], "samples_ts": [ 13.6306, 13.6574, 13.6635 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T18:40:38Z", "avg_ns": 64232482016, "stddev_ns": 4000223275, "avg_ts": 2.76252, "stddev_ts": 2.074908, "samples_ns": [ 24919518918, 68991680358, 98786246773 ], "samples_ts": [ 5.13654, 1.8553, 1.29573 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 488 }, { "timestamp_utc": "2025-12-09T18:57:03.756348+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T18:43:58Z\",\n \"avg_ns\": 17848818023,\n \"stddev_ns\": 4129014280,\n \"avg_ts\": 9.806760,\n \"stddev_ts\": 5.248467,\n \"samples_ns\": [ 33453298713, 10713355285, 9379800072 ],\n \"samples_ts\": [ 3.82623, 11.9477, 13.6463 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T18:45:26Z\",\n \"avg_ns\": 232127059417,\n \"stddev_ns\": 1490881217,\n \"avg_ts\": 2.216899,\n \"stddev_ts\": 0.191299,\n \"samples_ns\": [ 254144095733, 213789690590, 228447391929 ],\n \"samples_ts\": [ 2.01461, 2.39488, 2.24122 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T18:43:58Z", "avg_ns": 17848818023, "stddev_ns": 4129014280, "avg_ts": 9.80676, "stddev_ts": 5.248467, "samples_ns": [ 33453298713, 10713355285, 9379800072 ], "samples_ts": [ 3.82623, 11.9477, 13.6463 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T18:45:26Z", "avg_ns": 232127059417, "stddev_ns": 1490881217, "avg_ts": 2.216899, "stddev_ts": 0.191299, "samples_ns": [ 254144095733, 213789690590, 228447391929 ], "samples_ts": [ 2.01461, 2.39488, 2.24122 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 489 }, { "timestamp_utc": "2025-12-09T19:05:54.597728+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T18:57:09Z\",\n \"avg_ns\": 79922230521,\n \"stddev_ns\": 468720982,\n \"avg_ts\": 8.000134,\n \"stddev_ts\": 4.476962,\n \"samples_ns\": [ 71116230829, 39927006030, 128723454706 ],\n \"samples_ts\": [ 7.19948, 12.8234, 3.97752 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T19:03:22Z\",\n \"avg_ns\": 50398860549,\n \"stddev_ns\": 2698727985,\n \"avg_ts\": 3.517375,\n \"stddev_ts\": 1.984883,\n \"samples_ns\": [ 94335062930, 32527526961, 24333991757 ],\n \"samples_ts\": [ 1.35687, 3.93513, 5.26013 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T18:57:09Z", "avg_ns": 79922230521, "stddev_ns": 468720982, "avg_ts": 8.000134, "stddev_ts": 4.476962, "samples_ns": [ 71116230829, 39927006030, 128723454706 ], "samples_ts": [ 7.19948, 12.8234, 3.97752 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T19:03:22Z", "avg_ns": 50398860549, "stddev_ns": 2698727985, "avg_ts": 3.517375, "stddev_ts": 1.984883, "samples_ns": [ 94335062930, 32527526961, 24333991757 ], "samples_ts": [ 1.35687, 3.93513, 5.26013 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 490 }, { "timestamp_utc": "2025-12-09T19:24:07.217967+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T19:05:57Z\",\n \"avg_ns\": 79197851357,\n \"stddev_ns\": 2844425845,\n \"avg_ts\": 8.138231,\n \"stddev_ts\": 4.396187,\n \"samples_ns\": [ 132229507466, 64901930139, 40462116467 ],\n \"samples_ts\": [ 3.87206, 7.88883, 12.6538 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T19:11:16Z\",\n \"avg_ns\": 256720068835,\n \"stddev_ns\": 4170976943,\n \"avg_ts\": 2.008889,\n \"stddev_ts\": 0.215347,\n \"samples_ns\": [ 270476185599, 272870979199, 226813041709 ],\n \"samples_ts\": [ 1.89296, 1.87634, 2.25737 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T19:05:57Z", "avg_ns": 79197851357, "stddev_ns": 2844425845, "avg_ts": 8.138231, "stddev_ts": 4.396187, "samples_ns": [ 132229507466, 64901930139, 40462116467 ], "samples_ts": [ 3.87206, 7.88883, 12.6538 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T19:11:16Z", "avg_ns": 256720068835, "stddev_ns": 4170976943, "avg_ts": 2.008889, "stddev_ts": 0.215347, "samples_ns": [ 270476185599, 272870979199, 226813041709 ], "samples_ts": [ 1.89296, 1.87634, 2.25737 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 491 }, { "timestamp_utc": "2025-12-09T19:28:10.277064+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T19:24:12Z\",\n \"avg_ns\": 14783775025,\n \"stddev_ns\": 3729756394,\n \"avg_ts\": 10.767081,\n \"stddev_ts\": 4.993970,\n \"samples_ns\": [ 25597226838, 9379038152, 9375060087 ],\n \"samples_ts\": [ 5.00054, 13.6475, 13.6532 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T19:25:30Z\",\n \"avg_ns\": 53099032416,\n \"stddev_ns\": 3877037844,\n \"avg_ts\": 3.259977,\n \"stddev_ts\": 1.954512,\n \"samples_ns\": [ 24336216780, 40425945317, 94534935152 ],\n \"samples_ts\": [ 5.25965, 3.16628, 1.354 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T19:24:12Z", "avg_ns": 14783775025, "stddev_ns": 3729756394, "avg_ts": 10.767081, "stddev_ts": 4.99397, "samples_ns": [ 25597226838, 9379038152, 9375060087 ], "samples_ts": [ 5.00054, 13.6475, 13.6532 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T19:25:30Z", "avg_ns": 53099032416, "stddev_ns": 3877037844, "avg_ts": 3.259977, "stddev_ts": 1.954512, "samples_ns": [ 24336216780, 40425945317, 94534935152 ], "samples_ts": [ 5.25965, 3.16628, 1.354 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 492 }, { "timestamp_utc": "2025-12-09T19:41:03.697003+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T19:28:15Z\",\n \"avg_ns\": 28371889592,\n \"stddev_ns\": 4274429620,\n \"avg_ts\": 4.818602,\n \"stddev_ts\": 1.628739,\n \"samples_ns\": [ 33044980277, 32964216909, 19106471591 ],\n \"samples_ts\": [ 3.87351, 3.883, 6.6993 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T19:30:14Z\",\n \"avg_ns\": 216365484870,\n \"stddev_ns\": 2014020019,\n \"avg_ts\": 2.372804,\n \"stddev_ts\": 0.152272,\n \"samples_ns\": [ 217717690507, 202009017588, 229369746517 ],\n \"samples_ts\": [ 2.35167, 2.53454, 2.2322 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T19:28:15Z", "avg_ns": 28371889592, "stddev_ns": 4274429620, "avg_ts": 4.818602, "stddev_ts": 1.628739, "samples_ns": [ 33044980277, 32964216909, 19106471591 ], "samples_ts": [ 3.87351, 3.883, 6.6993 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T19:30:14Z", "avg_ns": 216365484870, "stddev_ns": 2014020019, "avg_ts": 2.372804, "stddev_ts": 0.152272, "samples_ns": [ 217717690507, 202009017588, 229369746517 ], "samples_ts": [ 2.35167, 2.53454, 2.2322 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 493 }, { "timestamp_utc": "2025-12-09T19:50:30.806882+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T19:41:06Z\",\n \"avg_ns\": 78281596850,\n \"stddev_ns\": 1701006234,\n \"avg_ts\": 8.219809,\n \"stddev_ts\": 4.234294,\n \"samples_ns\": [ 133059129960, 41620670497, 60164990093 ],\n \"samples_ts\": [ 3.84791, 12.3016, 8.50993 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T19:46:45Z\",\n \"avg_ns\": 74931220448,\n \"stddev_ns\": 4219604440,\n \"avg_ts\": 2.172391,\n \"stddev_ts\": 1.447078,\n \"samples_ns\": [ 96266883012, 95222166628, 33304611705 ],\n \"samples_ts\": [ 1.32964, 1.34422, 3.84331 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T19:41:06Z", "avg_ns": 78281596850, "stddev_ns": 1701006234, "avg_ts": 8.219809, "stddev_ts": 4.234294, "samples_ns": [ 133059129960, 41620670497, 60164990093 ], "samples_ts": [ 3.84791, 12.3016, 8.50993 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T19:46:45Z", "avg_ns": 74931220448, "stddev_ns": 4219604440, "avg_ts": 2.172391, "stddev_ts": 1.447078, "samples_ns": [ 96266883012, 95222166628, 33304611705 ], "samples_ts": [ 1.32964, 1.34422, 3.84331 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 494 }, { "timestamp_utc": "2025-12-09T20:08:03.194555+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T19:50:34Z\",\n \"avg_ns\": 92542093454,\n \"stddev_ns\": 1073967683,\n \"avg_ts\": 7.379262,\n \"stddev_ts\": 5.381145,\n \"samples_ns\": [ 115336545325, 124614811417, 37674923622 ],\n \"samples_ts\": [ 4.43918, 4.10866, 13.5899 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T19:55:49Z\",\n \"avg_ns\": 244360608485,\n \"stddev_ns\": 3669168908,\n \"avg_ts\": 2.129232,\n \"stddev_ts\": 0.340567,\n \"samples_ns\": [ 274909515854, 254417983939, 203754325663 ],\n \"samples_ts\": [ 1.86243, 2.01244, 2.51283 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T19:50:34Z", "avg_ns": 92542093454, "stddev_ns": 1073967683, "avg_ts": 7.379262, "stddev_ts": 5.381145, "samples_ns": [ 115336545325, 124614811417, 37674923622 ], "samples_ts": [ 4.43918, 4.10866, 13.5899 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T19:55:49Z", "avg_ns": 244360608485, "stddev_ns": 3669168908, "avg_ts": 2.129232, "stddev_ts": 0.340567, "samples_ns": [ 274909515854, 254417983939, 203754325663 ], "samples_ts": [ 1.86243, 2.01244, 2.51283 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 495 }, { "timestamp_utc": "2025-12-09T20:12:00.299282+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T20:08:08Z\",\n \"avg_ns\": 30031285329,\n \"stddev_ns\": 4165551206,\n \"avg_ts\": 4.357233,\n \"stddev_ts\": 0.830235,\n \"samples_ns\": [ 33017616804, 32997554206, 24078684979 ],\n \"samples_ts\": [ 3.87672, 3.87908, 5.3159 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T20:10:12Z\",\n \"avg_ns\": 35817319284,\n \"stddev_ns\": 3864848508,\n \"avg_ts\": 4.228237,\n \"stddev_ts\": 1.773684,\n \"samples_ns\": [ 24356307570, 24384514989, 58711135293 ],\n \"samples_ts\": [ 5.25531, 5.24923, 2.18017 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T20:08:08Z", "avg_ns": 30031285329, "stddev_ns": 4165551206, "avg_ts": 4.357233, "stddev_ts": 0.830235, "samples_ns": [ 33017616804, 32997554206, 24078684979 ], "samples_ts": [ 3.87672, 3.87908, 5.3159 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T20:10:12Z", "avg_ns": 35817319284, "stddev_ns": 3864848508, "avg_ts": 4.228237, "stddev_ts": 1.773684, "samples_ns": [ 24356307570, 24384514989, 58711135293 ], "samples_ts": [ 5.25531, 5.24923, 2.18017 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 496 }, { "timestamp_utc": "2025-12-09T20:25:46.237906+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T20:12:06Z\",\n \"avg_ns\": 33163255295,\n \"stddev_ns\": 140077894,\n \"avg_ts\": 3.859739,\n \"stddev_ts\": 0.016286,\n \"samples_ns\": [ 33036790023, 33139156850, 33313819012 ],\n \"samples_ts\": [ 3.87447, 3.8625, 3.84225 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T20:14:19Z\",\n \"avg_ns\": 228731772895,\n \"stddev_ns\": 2013636374,\n \"avg_ts\": 2.279634,\n \"stddev_ts\": 0.358818,\n \"samples_ns\": [ 202579911738, 209548730274, 274066676674 ],\n \"samples_ts\": [ 2.5274, 2.44335, 1.86816 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T20:12:06Z", "avg_ns": 33163255295, "stddev_ns": 140077894, "avg_ts": 3.859739, "stddev_ts": 0.016286, "samples_ns": [ 33036790023, 33139156850, 33313819012 ], "samples_ts": [ 3.87447, 3.8625, 3.84225 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T20:14:19Z", "avg_ns": 228731772895, "stddev_ns": 2013636374, "avg_ts": 2.279634, "stddev_ts": 0.358818, "samples_ns": [ 202579911738, 209548730274, 274066676674 ], "samples_ts": [ 2.5274, 2.44335, 1.86816 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 497 }, { "timestamp_utc": "2025-12-09T20:35:13.417223+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T20:25:49Z\",\n \"avg_ns\": 85449466866,\n \"stddev_ns\": 690564885,\n \"avg_ts\": 7.775105,\n \"stddev_ts\": 5.087227,\n \"samples_ns\": [ 131926948471, 86582204197, 37839247931 ],\n \"samples_ts\": [ 3.88094, 5.91346, 13.5309 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T20:31:04Z\",\n \"avg_ns\": 82746540693,\n \"stddev_ns\": 3949330913,\n \"avg_ts\": 1.572126,\n \"stddev_ts\": 0.246150,\n \"samples_ns\": [ 69911559036, 95458362429, 82869700615 ],\n \"samples_ts\": [ 1.83088, 1.3409, 1.54459 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T20:25:49Z", "avg_ns": 85449466866, "stddev_ns": 690564885, "avg_ts": 7.775105, "stddev_ts": 5.087227, "samples_ns": [ 131926948471, 86582204197, 37839247931 ], "samples_ts": [ 3.88094, 5.91346, 13.5309 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T20:31:04Z", "avg_ns": 82746540693, "stddev_ns": 3949330913, "avg_ts": 1.572126, "stddev_ts": 0.24615, "samples_ns": [ 69911559036, 95458362429, 82869700615 ], "samples_ts": [ 1.83088, 1.3409, 1.54459 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 498 }, { "timestamp_utc": "2025-12-09T20:51:19.048898+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T20:35:16Z\",\n \"avg_ns\": 92396363557,\n \"stddev_ns\": 1348038903,\n \"avg_ts\": 5.991810,\n \"stddev_ts\": 1.831636,\n \"samples_ns\": [ 70912348818, 131735397320, 74541344533 ],\n \"samples_ts\": [ 7.22018, 3.88658, 6.86867 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T20:40:31Z\",\n \"avg_ns\": 215603347158,\n \"stddev_ns\": 3731021205,\n \"avg_ts\": 2.420212,\n \"stddev_ts\": 0.387137,\n \"samples_ns\": [ 259472237799, 194097282745, 193240520931 ],\n \"samples_ts\": [ 1.97324, 2.63785, 2.64955 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T20:35:16Z", "avg_ns": 92396363557, "stddev_ns": 1348038903, "avg_ts": 5.99181, "stddev_ts": 1.831636, "samples_ns": [ 70912348818, 131735397320, 74541344533 ], "samples_ts": [ 7.22018, 3.88658, 6.86867 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T20:40:31Z", "avg_ns": 215603347158, "stddev_ns": 3731021205, "avg_ts": 2.420212, "stddev_ts": 0.387137, "samples_ns": [ 259472237799, 194097282745, 193240520931 ], "samples_ts": [ 1.97324, 2.63785, 2.64955 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 499 }, { "timestamp_utc": "2025-12-09T20:55:58.935657+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T20:51:24Z\",\n \"avg_ns\": 32955954123,\n \"stddev_ns\": 49572347,\n \"avg_ts\": 3.883978,\n \"stddev_ts\": 0.005838,\n \"samples_ns\": [ 33012026692, 32917953654, 32937882024 ],\n \"samples_ts\": [ 3.87737, 3.88846, 3.8861 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T20:53:36Z\",\n \"avg_ns\": 47054307753,\n \"stddev_ns\": 1545861244,\n \"avg_ts\": 3.935277,\n \"stddev_ts\": 2.203384,\n \"samples_ns\": [ 91998680848, 24740854720, 24423387692 ],\n \"samples_ts\": [ 1.39132, 5.17363, 5.24088 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T20:51:24Z", "avg_ns": 32955954123, "stddev_ns": 49572347, "avg_ts": 3.883978, "stddev_ts": 0.005838, "samples_ns": [ 33012026692, 32917953654, 32937882024 ], "samples_ts": [ 3.87737, 3.88846, 3.8861 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T20:53:36Z", "avg_ns": 47054307753, "stddev_ns": 1545861244, "avg_ts": 3.935277, "stddev_ts": 2.203384, "samples_ns": [ 91998680848, 24740854720, 24423387692 ], "samples_ts": [ 1.39132, 5.17363, 5.24088 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 500 }, { "timestamp_utc": "2025-12-09T21:10:14.354033+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T20:56:02Z\",\n \"avg_ns\": 27211918123,\n \"stddev_ns\": 4153246018,\n \"avg_ts\": 5.386227,\n \"stddev_ts\": 2.662072,\n \"samples_ns\": [ 15129804160, 33261006338, 33244943871 ],\n \"samples_ts\": [ 8.46012, 3.84835, 3.85021 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T20:57:33Z\",\n \"avg_ns\": 253349867797,\n \"stddev_ns\": 1488132863,\n \"avg_ts\": 2.047774,\n \"stddev_ts\": 0.299061,\n \"samples_ns\": [ 213948686743, 272910406719, 273190509930 ],\n \"samples_ts\": [ 2.3931, 1.87607, 1.87415 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T20:56:02Z", "avg_ns": 27211918123, "stddev_ns": 4153246018, "avg_ts": 5.386227, "stddev_ts": 2.662072, "samples_ns": [ 15129804160, 33261006338, 33244943871 ], "samples_ts": [ 8.46012, 3.84835, 3.85021 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T20:57:33Z", "avg_ns": 253349867797, "stddev_ns": 1488132863, "avg_ts": 2.047774, "stddev_ts": 0.299061, "samples_ns": [ 213948686743, 272910406719, 273190509930 ], "samples_ts": [ 2.3931, 1.87607, 1.87415 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 501 }, { "timestamp_utc": "2025-12-09T21:18:15.233033+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T21:10:17Z\",\n \"avg_ns\": 92528564756,\n \"stddev_ns\": 2033396132,\n \"avg_ts\": 5.975163,\n \"stddev_ts\": 1.810598,\n \"samples_ns\": [ 74362581688, 131617066228, 71606046354 ],\n \"samples_ts\": [ 6.88518, 3.89007, 7.15023 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T21:15:33Z\",\n \"avg_ns\": 53807820311,\n \"stddev_ns\": 4278952693,\n \"avg_ts\": 3.173183,\n \"stddev_ts\": 1.991954,\n \"samples_ns\": [ 24028155857, 45794456831, 91600848245 ],\n \"samples_ts\": [ 5.32708, 2.7951, 1.39737 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T21:10:17Z", "avg_ns": 92528564756, "stddev_ns": 2033396132, "avg_ts": 5.975163, "stddev_ts": 1.810598, "samples_ns": [ 74362581688, 131617066228, 71606046354 ], "samples_ts": [ 6.88518, 3.89007, 7.15023 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T21:15:33Z", "avg_ns": 53807820311, "stddev_ns": 4278952693, "avg_ts": 3.173183, "stddev_ts": 1.991954, "samples_ns": [ 24028155857, 45794456831, 91600848245 ], "samples_ts": [ 5.32708, 2.7951, 1.39737 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 502 }, { "timestamp_utc": "2025-12-09T21:35:37.730578+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T21:18:21Z\",\n \"avg_ns\": 83878351141,\n \"stddev_ns\": 727625177,\n \"avg_ts\": 7.871555,\n \"stddev_ts\": 5.000214,\n \"samples_ns\": [ 37972857018, 82027747826, 131634448581 ],\n \"samples_ts\": [ 13.4833, 6.24179, 3.88956 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T21:24:27Z\",\n \"avg_ns\": 223079286511,\n \"stddev_ns\": 1995653408,\n \"avg_ts\": 2.349857,\n \"stddev_ts\": 0.426365,\n \"samples_ns\": [ 187498120669, 210730760464, 271008978402 ],\n \"samples_ts\": [ 2.73069, 2.42964, 1.88924 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T21:18:21Z", "avg_ns": 83878351141, "stddev_ns": 727625177, "avg_ts": 7.871555, "stddev_ts": 5.000214, "samples_ns": [ 37972857018, 82027747826, 131634448581 ], "samples_ts": [ 13.4833, 6.24179, 3.88956 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_type": "gemma3 4B Q4_K - Medium", "model_size": 2483352832, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T21:24:27Z", "avg_ns": 223079286511, "stddev_ns": 1995653408, "avg_ts": 2.349857, "stddev_ts": 0.426365, "samples_ns": [ 187498120669, 210730760464, 271008978402 ], "samples_ts": [ 2.73069, 2.42964, 1.88924 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q4_K_M", "threads": 2, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 503 }, { "timestamp_utc": "2025-12-09T21:42:12.894717+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T21:36:09Z\",\n \"avg_ns\": 32167273620,\n \"stddev_ns\": 4155856856,\n \"avg_ts\": 4.211321,\n \"stddev_ts\": 1.188841,\n \"samples_ns\": [ 23765347685, 30194594805, 42541878372 ],\n \"samples_ts\": [ 5.38599, 4.23917, 3.0088 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T21:38:10Z\",\n \"avg_ns\": 80514241765,\n \"stddev_ns\": 1201796443,\n \"avg_ts\": 1.708572,\n \"stddev_ts\": 0.503315,\n \"samples_ns\": [ 113419398211, 65081008065, 63042319021 ],\n \"samples_ts\": [ 1.12855, 1.96678, 2.03038 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T21:36:09Z", "avg_ns": 32167273620, "stddev_ns": 4155856856, "avg_ts": 4.211321, "stddev_ts": 1.188841, "samples_ns": [ 23765347685, 30194594805, 42541878372 ], "samples_ts": [ 5.38599, 4.23917, 3.0088 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T21:38:10Z", "avg_ns": 80514241765, "stddev_ns": 1201796443, "avg_ts": 1.708572, "stddev_ts": 0.503315, "samples_ns": [ 113419398211, 65081008065, 63042319021 ], "samples_ts": [ 1.12855, 1.96678, 2.03038 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 512, "ubatch_size": 128, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 504 }, { "timestamp_utc": "2025-12-09T22:01:28.548347+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T21:42:13Z\",\n \"avg_ns\": 42624695862,\n \"stddev_ns\": 38116458,\n \"avg_ts\": 3.002956,\n \"stddev_ts\": 0.002685,\n \"samples_ns\": [ 42586208835, 42625449288, 42662429464 ],\n \"samples_ts\": [ 3.00567, 3.0029, 3.0003 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T21:45:01Z\",\n \"avg_ns\": 328679723871,\n \"stddev_ns\": 2035137287,\n \"avg_ts\": 1.557787,\n \"stddev_ts\": 0.009614,\n \"samples_ns\": [ 331005016768, 327222895240, 327811259606 ],\n \"samples_ts\": [ 1.5468, 1.56468, 1.56187 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T21:42:13Z", "avg_ns": 42624695862, "stddev_ns": 38116458, "avg_ts": 3.002956, "stddev_ts": 0.002685, "samples_ns": [ 42586208835, 42625449288, 42662429464 ], "samples_ts": [ 3.00567, 3.0029, 3.0003 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T21:45:01Z", "avg_ns": 328679723871, "stddev_ns": 2035137287, "avg_ts": 1.557787, "stddev_ts": 0.009614, "samples_ns": [ 331005016768, 327222895240, 327811259606 ], "samples_ts": [ 1.5468, 1.56468, 1.56187 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 512, "ubatch_size": 128, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 505 }, { "timestamp_utc": "2025-12-09T22:14:26.122584+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T22:01:29Z\",\n \"avg_ns\": 133340041714,\n \"stddev_ns\": 1398337842,\n \"avg_ts\": 3.951586,\n \"stddev_ts\": 0.827560,\n \"samples_ns\": [ 133834043058, 105896679459, 160289402627 ],\n \"samples_ts\": [ 3.82563, 4.8349, 3.19422 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T22:10:22Z\",\n \"avg_ns\": 81063292196,\n \"stddev_ns\": 3636174179,\n \"avg_ts\": 1.684517,\n \"stddev_ts\": 0.478742,\n \"samples_ns\": [ 62265637867, 69051665064, 111872573658 ],\n \"samples_ts\": [ 2.05571, 1.85368, 1.14416 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T22:01:29Z", "avg_ns": 133340041714, "stddev_ns": 1398337842, "avg_ts": 3.951586, "stddev_ts": 0.82756, "samples_ns": [ 133834043058, 105896679459, 160289402627 ], "samples_ts": [ 3.82563, 4.8349, 3.19422 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T22:10:22Z", "avg_ns": 81063292196, "stddev_ns": 3636174179, "avg_ts": 1.684517, "stddev_ts": 0.478742, "samples_ns": [ 62265637867, 69051665064, 111872573658 ], "samples_ts": [ 2.05571, 1.85368, 1.14416 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 512, "ubatch_size": 128, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 506 }, { "timestamp_utc": "2025-12-09T22:39:29.180211+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T22:14:27Z\",\n \"avg_ns\": 121784486920,\n \"stddev_ns\": 1488752240,\n \"avg_ts\": 4.291397,\n \"stddev_ts\": 0.774231,\n \"samples_ns\": [ 126057038775, 139965702777, 99330719208 ],\n \"samples_ts\": [ 4.06165, 3.65804, 5.1545 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T22:22:27Z\",\n \"avg_ns\": 340308016665,\n \"stddev_ns\": 861020332,\n \"avg_ts\": 1.504526,\n \"stddev_ts\": 0.003808,\n \"samples_ns\": [ 340371112920, 339417184057, 341135753019 ],\n \"samples_ts\": [ 1.50424, 1.50847, 1.50087 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T22:14:27Z", "avg_ns": 121784486920, "stddev_ns": 1488752240, "avg_ts": 4.291397, "stddev_ts": 0.774231, "samples_ns": [ 126057038775, 139965702777, 99330719208 ], "samples_ts": [ 4.06165, 3.65804, 5.1545 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T22:22:27Z", "avg_ns": 340308016665, "stddev_ns": 861020332, "avg_ts": 1.504526, "stddev_ts": 0.003808, "samples_ns": [ 340371112920, 339417184057, 341135753019 ], "samples_ts": [ 1.50424, 1.50847, 1.50087 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 512, "ubatch_size": 128, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 507 }, { "timestamp_utc": "2025-12-09T22:45:46.526339+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T22:39:30Z\",\n \"avg_ns\": 23854739967,\n \"stddev_ns\": 56026440,\n \"avg_ts\": 5.365830,\n \"stddev_ts\": 0.012613,\n \"samples_ns\": [ 23869593834, 23792783810, 23901842259 ],\n \"samples_ts\": [ 5.36247, 5.37978, 5.35524 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T22:41:23Z\",\n \"avg_ns\": 87613502797,\n \"stddev_ns\": 3924213930,\n \"avg_ts\": 1.521716,\n \"stddev_ts\": 0.361709,\n \"samples_ns\": [ 68775371327, 112160635260, 81904501804 ],\n \"samples_ts\": [ 1.86113, 1.14122, 1.5628 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T22:39:30Z", "avg_ns": 23854739967, "stddev_ns": 56026440, "avg_ts": 5.36583, "stddev_ts": 0.012613, "samples_ns": [ 23869593834, 23792783810, 23901842259 ], "samples_ts": [ 5.36247, 5.37978, 5.35524 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T22:41:23Z", "avg_ns": 87613502797, "stddev_ns": 3924213930, "avg_ts": 1.521716, "stddev_ts": 0.361709, "samples_ns": [ 68775371327, 112160635260, 81904501804 ], "samples_ts": [ 1.86113, 1.14122, 1.5628 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 512, "ubatch_size": 256, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 508 }, { "timestamp_utc": "2025-12-09T23:04:51.079634+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T22:45:47Z\",\n \"avg_ns\": 25102578225,\n \"stddev_ns\": 2073163681,\n \"avg_ts\": 5.121306,\n \"stddev_ts\": 0.403711,\n \"samples_ns\": [ 23918216577, 23893100580, 27496417519 ],\n \"samples_ts\": [ 5.35157, 5.3572, 4.65515 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T22:47:26Z\",\n \"avg_ns\": 347704207570,\n \"stddev_ns\": 555738711,\n \"avg_ts\": 1.472518,\n \"stddev_ts\": 0.002352,\n \"samples_ns\": [ 347210673520, 348306169294, 347595779897 ],\n \"samples_ts\": [ 1.47461, 1.46997, 1.47298 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T22:45:47Z", "avg_ns": 25102578225, "stddev_ns": 2073163681, "avg_ts": 5.121306, "stddev_ts": 0.403711, "samples_ns": [ 23918216577, 23893100580, 27496417519 ], "samples_ts": [ 5.35157, 5.3572, 4.65515 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T22:47:26Z", "avg_ns": 347704207570, "stddev_ns": 555738711, "avg_ts": 1.472518, "stddev_ts": 0.002352, "samples_ns": [ 347210673520, 348306169294, 347595779897 ], "samples_ts": [ 1.47461, 1.46997, 1.47298 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 512, "ubatch_size": 256, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 509 }, { "timestamp_utc": "2025-12-09T23:17:00.834239+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T23:04:52Z\",\n \"avg_ns\": 125417612509,\n \"stddev_ns\": 1252279076,\n \"avg_ts\": 4.116251,\n \"stddev_ts\": 0.467935,\n \"samples_ns\": [ 136895562869, 129018237698, 110339036962 ],\n \"samples_ts\": [ 3.74008, 3.96843, 4.64024 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T23:12:52Z\",\n \"avg_ns\": 82538611552,\n \"stddev_ns\": 3352829829,\n \"avg_ts\": 1.650715,\n \"stddev_ts\": 0.470406,\n \"samples_ns\": [ 112490604143, 73053787752, 62071442763 ],\n \"samples_ts\": [ 1.13787, 1.75213, 2.06214 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T23:04:52Z", "avg_ns": 125417612509, "stddev_ns": 1252279076, "avg_ts": 4.116251, "stddev_ts": 0.467935, "samples_ns": [ 136895562869, 129018237698, 110339036962 ], "samples_ts": [ 3.74008, 3.96843, 4.64024 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T23:12:52Z", "avg_ns": 82538611552, "stddev_ns": 3352829829, "avg_ts": 1.650715, "stddev_ts": 0.470406, "samples_ns": [ 112490604143, 73053787752, 62071442763 ], "samples_ts": [ 1.13787, 1.75213, 2.06214 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 512, "ubatch_size": 256, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 510 }, { "timestamp_utc": "2025-12-09T23:42:12.177911+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T23:17:01Z\",\n \"avg_ns\": 124649048767,\n \"stddev_ns\": 4288927930,\n \"avg_ts\": 4.145751,\n \"stddev_ts\": 0.502027,\n \"samples_ns\": [ 108528861588, 129886551837, 135531732878 ],\n \"samples_ts\": [ 4.71764, 3.9419, 3.77771 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T23:25:52Z\",\n \"avg_ns\": 326302174206,\n \"stddev_ns\": 206725927,\n \"avg_ts\": 1.569098,\n \"stddev_ts\": 0.000994,\n \"samples_ns\": [ 326423586293, 326419454920, 326063481407 ],\n \"samples_ts\": [ 1.56851, 1.56853, 1.57025 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T23:17:01Z", "avg_ns": 124649048767, "stddev_ns": 4288927930, "avg_ts": 4.145751, "stddev_ts": 0.502027, "samples_ns": [ 108528861588, 129886551837, 135531732878 ], "samples_ts": [ 4.71764, 3.9419, 3.77771 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T23:25:52Z", "avg_ns": 326302174206, "stddev_ns": 206725927, "avg_ts": 1.569098, "stddev_ts": 0.000994, "samples_ns": [ 326423586293, 326419454920, 326063481407 ], "samples_ts": [ 1.56851, 1.56853, 1.57025 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 512, "ubatch_size": 256, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 511 }, { "timestamp_utc": "2025-12-09T23:48:52.536933+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T23:42:13Z\",\n \"avg_ns\": 42340167773,\n \"stddev_ns\": 36893067,\n \"avg_ts\": 3.023136,\n \"stddev_ts\": 0.002633,\n \"samples_ns\": [ 42381078284, 42329996554, 42309428483 ],\n \"samples_ts\": [ 3.02022, 3.02386, 3.02533 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T23:44:58Z\",\n \"avg_ns\": 77628223858,\n \"stddev_ns\": 2443179006,\n \"avg_ts\": 1.735322,\n \"stddev_ts\": 0.441044,\n \"samples_ns\": [ 66657237173, 62324151407, 103903282996 ],\n \"samples_ts\": [ 1.92027, 2.05378, 1.23191 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T23:42:13Z", "avg_ns": 42340167773, "stddev_ns": 36893067, "avg_ts": 3.023136, "stddev_ts": 0.002633, "samples_ns": [ 42381078284, 42329996554, 42309428483 ], "samples_ts": [ 3.02022, 3.02386, 3.02533 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-09T23:44:58Z", "avg_ns": 77628223858, "stddev_ns": 2443179006, "avg_ts": 1.735322, "stddev_ts": 0.441044, "samples_ns": [ 66657237173, 62324151407, 103903282996 ], "samples_ts": [ 1.92027, 2.05378, 1.23191 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 512, "ubatch_size": 512, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 512 }, { "timestamp_utc": "2025-12-10T00:07:41.619978+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T23:48:54Z\",\n \"avg_ns\": 28551405340,\n \"stddev_ns\": 1453749642,\n \"avg_ts\": 4.703262,\n \"stddev_ts\": 1.154490,\n \"samples_ns\": [ 37980215115, 23824673631, 23849327274 ],\n \"samples_ts\": [ 3.37018, 5.37258, 5.36703 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T23:51:02Z\",\n \"avg_ns\": 332794221439,\n \"stddev_ns\": 4076512577,\n \"avg_ts\": 1.539562,\n \"stddev_ts\": 0.049340,\n \"samples_ns\": [ 326392884829, 326649184096, 345340595393 ],\n \"samples_ts\": [ 1.56866, 1.56743, 1.48259 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-09T23:48:54Z", "avg_ns": 28551405340, "stddev_ns": 1453749642, "avg_ts": 4.703262, "stddev_ts": 1.15449, "samples_ns": [ 37980215115, 23824673631, 23849327274 ], "samples_ts": [ 3.37018, 5.37258, 5.36703 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-09T23:51:02Z", "avg_ns": 332794221439, "stddev_ns": 4076512577, "avg_ts": 1.539562, "stddev_ts": 0.04934, "samples_ns": [ 326392884829, 326649184096, 345340595393 ], "samples_ts": [ 1.56866, 1.56743, 1.48259 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 512, "ubatch_size": 512, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 513 }, { "timestamp_utc": "2025-12-10T00:20:53.068901+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T00:07:43Z\",\n \"avg_ns\": 121308078999,\n \"stddev_ns\": 4148934167,\n \"avg_ts\": 4.402918,\n \"stddev_ts\": 1.047708,\n \"samples_ns\": [ 96358077372, 157159582945, 110406576681 ],\n \"samples_ts\": [ 5.31351, 3.25784, 4.6374 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T00:16:28Z\",\n \"avg_ns\": 88072244241,\n \"stddev_ns\": 4043527556,\n \"avg_ts\": 1.507985,\n \"stddev_ts\": 0.330770,\n \"samples_ns\": [ 73111164567, 113149468089, 77956100067 ],\n \"samples_ts\": [ 1.75076, 1.13125, 1.64195 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T00:07:43Z", "avg_ns": 121308078999, "stddev_ns": 4148934167, "avg_ts": 4.402918, "stddev_ts": 1.047708, "samples_ns": [ 96358077372, 157159582945, 110406576681 ], "samples_ts": [ 5.31351, 3.25784, 4.6374 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-10T00:16:28Z", "avg_ns": 88072244241, "stddev_ns": 4043527556, "avg_ts": 1.507985, "stddev_ts": 0.33077, "samples_ns": [ 73111164567, 113149468089, 77956100067 ], "samples_ts": [ 1.75076, 1.13125, 1.64195 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 512, "ubatch_size": 512, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 514 }, { "timestamp_utc": "2025-12-10T00:45:49.886674+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T00:20:54Z\",\n \"avg_ns\": 137871627848,\n \"stddev_ns\": 1760972080,\n \"avg_ts\": 3.925765,\n \"stddev_ts\": 1.211666,\n \"samples_ns\": [ 161810900842, 96186620615, 155617362089 ],\n \"samples_ts\": [ 3.16419, 5.32299, 3.29012 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T00:29:33Z\",\n \"avg_ns\": 325041720096,\n \"stddev_ns\": 1167182090,\n \"avg_ts\": 1.575918,\n \"stddev_ts\": 0.041392,\n \"samples_ns\": [ 335051667269, 320033882375, 320039610645 ],\n \"samples_ts\": [ 1.52812, 1.59983, 1.5998 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T00:20:54Z", "avg_ns": 137871627848, "stddev_ns": 1760972080, "avg_ts": 3.925765, "stddev_ts": 1.211666, "samples_ns": [ 161810900842, 96186620615, 155617362089 ], "samples_ts": [ 3.16419, 5.32299, 3.29012 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-10T00:29:33Z", "avg_ns": 325041720096, "stddev_ns": 1167182090, "avg_ts": 1.575918, "stddev_ts": 0.041392, "samples_ns": [ 335051667269, 320033882375, 320039610645 ], "samples_ts": [ 1.52812, 1.59983, 1.5998 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 512, "ubatch_size": 512, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 515 }, { "timestamp_utc": "2025-12-10T00:51:45.225454+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T00:45:50Z\",\n \"avg_ns\": 25688686136,\n \"stddev_ns\": 1310123860,\n \"avg_ts\": 5.034529,\n \"stddev_ts\": 0.603370,\n \"samples_ns\": [ 23767391764, 23790790431, 29507876215 ],\n \"samples_ts\": [ 5.38553, 5.38023, 4.33782 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T00:47:31Z\",\n \"avg_ns\": 84160683209,\n \"stddev_ns\": 3348636954,\n \"avg_ts\": 1.610573,\n \"stddev_ts\": 0.468183,\n \"samples_ns\": [ 109848010578, 81643204168, 60990834881 ],\n \"samples_ts\": [ 1.16525, 1.5678, 2.09868 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T00:45:50Z", "avg_ns": 25688686136, "stddev_ns": 1310123860, "avg_ts": 5.034529, "stddev_ts": 0.60337, "samples_ns": [ 23767391764, 23790790431, 29507876215 ], "samples_ts": [ 5.38553, 5.38023, 4.33782 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-10T00:47:31Z", "avg_ns": 84160683209, "stddev_ns": 3348636954, "avg_ts": 1.610573, "stddev_ts": 0.468183, "samples_ns": [ 109848010578, 81643204168, 60990834881 ], "samples_ts": [ 1.16525, 1.5678, 2.09868 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 516 }, { "timestamp_utc": "2025-12-10T01:10:47.864724+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T00:51:46Z\",\n \"avg_ns\": 40334066025,\n \"stddev_ns\": 3908023539,\n \"avg_ts\": 3.194675,\n \"stddev_ts\": 0.327878,\n \"samples_ns\": [ 35821473131, 42595552216, 42585172730 ],\n \"samples_ts\": [ 3.57328, 3.00501, 3.00574 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T00:54:11Z\",\n \"avg_ns\": 331901987606,\n \"stddev_ns\": 1555334218,\n \"avg_ts\": 1.542988,\n \"stddev_ts\": 0.028905,\n \"samples_ns\": [ 338958340183, 329776832668, 326970789967 ],\n \"samples_ts\": [ 1.51051, 1.55257, 1.56589 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T00:51:46Z", "avg_ns": 40334066025, "stddev_ns": 3908023539, "avg_ts": 3.194675, "stddev_ts": 0.327878, "samples_ns": [ 35821473131, 42595552216, 42585172730 ], "samples_ts": [ 3.57328, 3.00501, 3.00574 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-10T00:54:11Z", "avg_ns": 331901987606, "stddev_ns": 1555334218, "avg_ts": 1.542988, "stddev_ts": 0.028905, "samples_ns": [ 338958340183, 329776832668, 326970789967 ], "samples_ts": [ 1.51051, 1.55257, 1.56589 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 517 }, { "timestamp_utc": "2025-12-10T01:23:28.897251+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T01:10:48Z\",\n \"avg_ns\": 137011934606,\n \"stddev_ns\": 3564624128,\n \"avg_ts\": 3.953170,\n \"stddev_ts\": 1.218164,\n \"samples_ns\": [ 165677108392, 95760897218, 149597798210 ],\n \"samples_ts\": [ 3.09035, 5.34665, 3.42251 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T01:19:20Z\",\n \"avg_ns\": 82372511743,\n \"stddev_ns\": 1992511151,\n \"avg_ts\": 1.596743,\n \"stddev_ts\": 0.333868,\n \"samples_ns\": [ 86193543157, 64904009833, 96019982241 ],\n \"samples_ts\": [ 1.48503, 1.97214, 1.33306 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T01:10:48Z", "avg_ns": 137011934606, "stddev_ns": 3564624128, "avg_ts": 3.95317, "stddev_ts": 1.218164, "samples_ns": [ 165677108392, 95760897218, 149597798210 ], "samples_ts": [ 3.09035, 5.34665, 3.42251 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-10T01:19:20Z", "avg_ns": 82372511743, "stddev_ns": 1992511151, "avg_ts": 1.596743, "stddev_ts": 0.333868, "samples_ns": [ 86193543157, 64904009833, 96019982241 ], "samples_ts": [ 1.48503, 1.97214, 1.33306 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 518 }, { "timestamp_utc": "2025-12-10T01:50:00.836180+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T01:23:30Z\",\n \"avg_ns\": 121917489901,\n \"stddev_ns\": 2396736246,\n \"avg_ts\": 4.411770,\n \"stddev_ts\": 1.130201,\n \"samples_ns\": [ 110271628447, 160813669190, 94667172068 ],\n \"samples_ts\": [ 4.64308, 3.18381, 5.40842 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T01:31:53Z\",\n \"avg_ns\": 362006181259,\n \"stddev_ns\": 4287783453,\n \"avg_ts\": 1.415287,\n \"stddev_ts\": 0.045232,\n \"samples_ns\": [ 348894430456, 368205623569, 368918489752 ],\n \"samples_ts\": [ 1.46749, 1.39053, 1.38784 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T01:23:30Z", "avg_ns": 121917489901, "stddev_ns": 2396736246, "avg_ts": 4.41177, "stddev_ts": 1.130201, "samples_ns": [ 110271628447, 160813669190, 94667172068 ], "samples_ts": [ 4.64308, 3.18381, 5.40842 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-10T01:31:53Z", "avg_ns": 362006181259, "stddev_ns": 4287783453, "avg_ts": 1.415287, "stddev_ts": 0.045232, "samples_ns": [ 348894430456, 368205623569, 368918489752 ], "samples_ts": [ 1.46749, 1.39053, 1.38784 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 519 }, { "timestamp_utc": "2025-12-10T01:56:09.921465+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T01:50:02Z\",\n \"avg_ns\": 23914529604,\n \"stddev_ns\": 67406711,\n \"avg_ts\": 5.352423,\n \"stddev_ts\": 0.015079,\n \"samples_ns\": [ 23985460293, 23851310972, 23906817549 ],\n \"samples_ts\": [ 5.33657, 5.36658, 5.35412 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T01:51:41Z\",\n \"avg_ns\": 89360823705,\n \"stddev_ns\": 4183215369,\n \"avg_ts\": 1.505041,\n \"stddev_ts\": 0.402822,\n \"samples_ns\": [ 85817048200, 115405184971, 66860237945 ],\n \"samples_ts\": [ 1.49155, 1.10914, 1.91444 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T01:50:02Z", "avg_ns": 23914529604, "stddev_ns": 67406711, "avg_ts": 5.352423, "stddev_ts": 0.015079, "samples_ns": [ 23985460293, 23851310972, 23906817549 ], "samples_ts": [ 5.33657, 5.36658, 5.35412 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-10T01:51:41Z", "avg_ns": 89360823705, "stddev_ns": 4183215369, "avg_ts": 1.505041, "stddev_ts": 0.402822, "samples_ns": [ 85817048200, 115405184971, 66860237945 ], "samples_ts": [ 1.49155, 1.10914, 1.91444 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 520 }, { "timestamp_utc": "2025-12-10T02:15:42.122753+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T01:56:10Z\",\n \"avg_ns\": 31258065704,\n \"stddev_ns\": 4267303221,\n \"avg_ts\": 4.354541,\n \"stddev_ts\": 1.215705,\n \"samples_ns\": [ 23984785938, 27049657712, 42739753463 ],\n \"samples_ts\": [ 5.33672, 4.73204, 2.99487 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T01:58:08Z\",\n \"avg_ns\": 350804773554,\n \"stddev_ns\": 4010224832,\n \"avg_ts\": 1.461657,\n \"stddev_ts\": 0.069682,\n \"samples_ns\": [ 361145038070, 359233249491, 332036033103 ],\n \"samples_ts\": [ 1.41771, 1.42526, 1.542 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T01:56:10Z", "avg_ns": 31258065704, "stddev_ns": 4267303221, "avg_ts": 4.354541, "stddev_ts": 1.215705, "samples_ns": [ 23984785938, 27049657712, 42739753463 ], "samples_ts": [ 5.33672, 4.73204, 2.99487 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-10T01:58:08Z", "avg_ns": 350804773554, "stddev_ns": 4010224832, "avg_ts": 1.461657, "stddev_ts": 0.069682, "samples_ns": [ 361145038070, 359233249491, 332036033103 ], "samples_ts": [ 1.41771, 1.42526, 1.542 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 521 }, { "timestamp_utc": "2025-12-10T02:28:15.564176+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T02:15:43Z\",\n \"avg_ns\": 136504320980,\n \"stddev_ns\": 1634498289,\n \"avg_ts\": 3.974207,\n \"stddev_ts\": 1.222888,\n \"samples_ns\": [ 170125008196, 95708400007, 143679554738 ],\n \"samples_ts\": [ 3.00955, 5.34958, 3.56349 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T02:24:08Z\",\n \"avg_ns\": 81959549376,\n \"stddev_ns\": 3982727389,\n \"avg_ts\": 1.602260,\n \"stddev_ts\": 0.329900,\n \"samples_ns\": [ 91529139242, 64552675115, 89796833772 ],\n \"samples_ts\": [ 1.39846, 1.98288, 1.42544 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T02:15:43Z", "avg_ns": 136504320980, "stddev_ns": 1634498289, "avg_ts": 3.974207, "stddev_ts": 1.222888, "samples_ns": [ 170125008196, 95708400007, 143679554738 ], "samples_ts": [ 3.00955, 5.34958, 3.56349 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-10T02:24:08Z", "avg_ns": 81959549376, "stddev_ns": 3982727389, "avg_ts": 1.60226, "stddev_ts": 0.3299, "samples_ns": [ 91529139242, 64552675115, 89796833772 ], "samples_ts": [ 1.39846, 1.98288, 1.42544 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 522 }, { "timestamp_utc": "2025-12-10T02:54:38.378869+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T02:28:17Z\",\n \"avg_ns\": 120203107970,\n \"stddev_ns\": 3928265922,\n \"avg_ts\": 4.558601,\n \"stddev_ts\": 1.307054,\n \"samples_ns\": [ 95504601837, 167848199345, 97256522729 ],\n \"samples_ts\": [ 5.361, 3.05038, 5.26443 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T02:36:42Z\",\n \"avg_ns\": 358406409104,\n \"stddev_ns\": 3990959541,\n \"avg_ts\": 1.430637,\n \"stddev_ts\": 0.067907,\n \"samples_ns\": [ 339286437852, 368013286642, 367919502820 ],\n \"samples_ts\": [ 1.50905, 1.39125, 1.39161 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T02:28:17Z", "avg_ns": 120203107970, "stddev_ns": 3928265922, "avg_ts": 4.558601, "stddev_ts": 1.307054, "samples_ns": [ 95504601837, 167848199345, 97256522729 ], "samples_ts": [ 5.361, 3.05038, 5.26443 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-10T02:36:42Z", "avg_ns": 358406409104, "stddev_ns": 3990959541, "avg_ts": 1.430637, "stddev_ts": 0.067907, "samples_ns": [ 339286437852, 368013286642, 367919502820 ], "samples_ts": [ 1.50905, 1.39125, 1.39161 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 523 }, { "timestamp_utc": "2025-12-10T03:00:55.131070+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T02:54:40Z\",\n \"avg_ns\": 23844174717,\n \"stddev_ns\": 75514157,\n \"avg_ts\": 5.368223,\n \"stddev_ts\": 0.017022,\n \"samples_ns\": [ 23906458432, 23865880704, 23760185017 ],\n \"samples_ts\": [ 5.3542, 5.36331, 5.38716 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T02:56:28Z\",\n \"avg_ns\": 88653642230,\n \"stddev_ns\": 965267315,\n \"avg_ts\": 1.497458,\n \"stddev_ts\": 0.325597,\n \"samples_ns\": [ 74392512946, 113895314803, 77673098942 ],\n \"samples_ts\": [ 1.7206, 1.12384, 1.64793 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T02:54:40Z", "avg_ns": 23844174717, "stddev_ns": 75514157, "avg_ts": 5.368223, "stddev_ts": 0.017022, "samples_ns": [ 23906458432, 23865880704, 23760185017 ], "samples_ts": [ 5.3542, 5.36331, 5.38716 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-10T02:56:28Z", "avg_ns": 88653642230, "stddev_ns": 965267315, "avg_ts": 1.497458, "stddev_ts": 0.325597, "samples_ns": [ 74392512946, 113895314803, 77673098942 ], "samples_ts": [ 1.7206, 1.12384, 1.64793 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 524 }, { "timestamp_utc": "2025-12-10T03:20:20.694897+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T03:00:56Z\",\n \"avg_ns\": 26796250638,\n \"stddev_ns\": 1244105354,\n \"avg_ts\": 4.895749,\n \"stddev_ts\": 0.884604,\n \"samples_ns\": [ 23626849941, 23724333266, 33037568709 ],\n \"samples_ts\": [ 5.41757, 5.3953, 3.87438 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T03:02:40Z\",\n \"avg_ns\": 353120207478,\n \"stddev_ns\": 1824229334,\n \"avg_ts\": 1.450101,\n \"stddev_ts\": 0.019308,\n \"samples_ns\": [ 355586571697, 356035801338, 347738249400 ],\n \"samples_ts\": [ 1.43987, 1.43806, 1.47237 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T03:00:56Z", "avg_ns": 26796250638, "stddev_ns": 1244105354, "avg_ts": 4.895749, "stddev_ts": 0.884604, "samples_ns": [ 23626849941, 23724333266, 33037568709 ], "samples_ts": [ 5.41757, 5.3953, 3.87438 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-10T03:02:40Z", "avg_ns": 353120207478, "stddev_ns": 1824229334, "avg_ts": 1.450101, "stddev_ts": 0.019308, "samples_ns": [ 355586571697, 356035801338, 347738249400 ], "samples_ts": [ 1.43987, 1.43806, 1.47237 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 525 }, { "timestamp_utc": "2025-12-10T03:32:33.403787+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T03:20:21Z\",\n \"avg_ns\": 132004551425,\n \"stddev_ns\": 3845424544,\n \"avg_ts\": 3.943097,\n \"stddev_ts\": 0.611297,\n \"samples_ns\": [ 154364717200, 112819492374, 128829444702 ],\n \"samples_ts\": [ 3.31682, 4.53822, 3.97425 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T03:28:33Z\",\n \"avg_ns\": 79424760271,\n \"stddev_ns\": 3654033158,\n \"avg_ts\": 1.691560,\n \"stddev_ts\": 0.422713,\n \"samples_ns\": [ 105260411465, 63220083033, 69793786317 ],\n \"samples_ts\": [ 1.21603, 2.02467, 1.83397 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T03:20:21Z", "avg_ns": 132004551425, "stddev_ns": 3845424544, "avg_ts": 3.943097, "stddev_ts": 0.611297, "samples_ns": [ 154364717200, 112819492374, 128829444702 ], "samples_ts": [ 3.31682, 4.53822, 3.97425 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-10T03:28:33Z", "avg_ns": 79424760271, "stddev_ns": 3654033158, "avg_ts": 1.69156, "stddev_ts": 0.422713, "samples_ns": [ 105260411465, 63220083033, 69793786317 ], "samples_ts": [ 1.21603, 2.02467, 1.83397 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 526 }, { "timestamp_utc": "2025-12-10T03:58:25.423292+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T03:32:35Z\",\n \"avg_ns\": 121275578053,\n \"stddev_ns\": 3506052197,\n \"avg_ts\": 4.377143,\n \"stddev_ts\": 0.986427,\n \"samples_ns\": [ 96320465272, 152886985928, 114619282961 ],\n \"samples_ts\": [ 5.31559, 3.34888, 4.46696 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T03:41:22Z\",\n \"avg_ns\": 340646272831,\n \"stddev_ns\": 3783124481,\n \"avg_ts\": 1.504323,\n \"stddev_ts\": 0.053672,\n \"samples_ns\": [ 331076452173, 336267313338, 354595052984 ],\n \"samples_ts\": [ 1.54647, 1.5226, 1.4439 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T03:32:35Z", "avg_ns": 121275578053, "stddev_ns": 3506052197, "avg_ts": 4.377143, "stddev_ts": 0.986427, "samples_ns": [ 96320465272, 152886985928, 114619282961 ], "samples_ts": [ 5.31559, 3.34888, 4.46696 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-10T03:41:22Z", "avg_ns": 340646272831, "stddev_ns": 3783124481, "avg_ts": 1.504323, "stddev_ts": 0.053672, "samples_ns": [ 331076452173, 336267313338, 354595052984 ], "samples_ts": [ 1.54647, 1.5226, 1.4439 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 527 }, { "timestamp_utc": "2025-12-10T04:05:15.859721+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T03:58:27Z\",\n \"avg_ns\": 32544519193,\n \"stddev_ns\": 1868328341,\n \"avg_ts\": 4.154948,\n \"stddev_ts\": 1.180524,\n \"samples_ns\": [ 42308215146, 31536129633, 23789212801 ],\n \"samples_ts\": [ 3.02542, 4.05884, 5.38059 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T04:00:47Z\",\n \"avg_ns\": 89346369359,\n \"stddev_ns\": 4117750489,\n \"avg_ts\": 1.512129,\n \"stddev_ts\": 0.452462,\n \"samples_ns\": [ 63203989522, 95448570721, 109386547835 ],\n \"samples_ts\": [ 2.02519, 1.34104, 1.17016 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T03:58:27Z", "avg_ns": 32544519193, "stddev_ns": 1868328341, "avg_ts": 4.154948, "stddev_ts": 1.180524, "samples_ns": [ 42308215146, 31536129633, 23789212801 ], "samples_ts": [ 3.02542, 4.05884, 5.38059 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-10T04:00:47Z", "avg_ns": 89346369359, "stddev_ns": 4117750489, "avg_ts": 1.512129, "stddev_ts": 0.452462, "samples_ns": [ 63203989522, 95448570721, 109386547835 ], "samples_ts": [ 2.02519, 1.34104, 1.17016 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 528 }, { "timestamp_utc": "2025-12-10T04:24:46.906370+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T04:05:16Z\",\n \"avg_ns\": 23865202003,\n \"stddev_ns\": 16860347,\n \"avg_ts\": 5.363459,\n \"stddev_ts\": 0.003789,\n \"samples_ns\": [ 23867584844, 23880742742, 23847278425 ],\n \"samples_ts\": [ 5.36292, 5.35997, 5.36749 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T04:06:52Z\",\n \"avg_ns\": 357922229397,\n \"stddev_ns\": 1907651210,\n \"avg_ts\": 1.431557,\n \"stddev_ts\": 0.048579,\n \"samples_ns\": [ 344186922968, 364085800723, 365493964501 ],\n \"samples_ts\": [ 1.48756, 1.40626, 1.40084 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T04:05:16Z", "avg_ns": 23865202003, "stddev_ns": 16860347, "avg_ts": 5.363459, "stddev_ts": 0.003789, "samples_ns": [ 23867584844, 23880742742, 23847278425 ], "samples_ts": [ 5.36292, 5.35997, 5.36749 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-10T04:06:52Z", "avg_ns": 357922229397, "stddev_ns": 1907651210, "avg_ts": 1.431557, "stddev_ts": 0.048579, "samples_ns": [ 344186922968, 364085800723, 365493964501 ], "samples_ts": [ 1.48756, 1.40626, 1.40084 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 529 }, { "timestamp_utc": "2025-12-10T04:37:05.065118+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T04:24:48Z\",\n \"avg_ns\": 124905727755,\n \"stddev_ns\": 4085519829,\n \"avg_ts\": 4.136988,\n \"stddev_ts\": 0.500737,\n \"samples_ns\": [ 134779581818, 131284240064, 108653361383 ],\n \"samples_ts\": [ 3.79879, 3.89993, 4.71223 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T04:32:48Z\",\n \"avg_ns\": 85214608074,\n \"stddev_ns\": 3908439799,\n \"avg_ts\": 1.597674,\n \"stddev_ts\": 0.451205,\n \"samples_ns\": [ 116109696605, 75062573316, 64471554303 ],\n \"samples_ts\": [ 1.10241, 1.70524, 1.98537 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T04:24:48Z", "avg_ns": 124905727755, "stddev_ns": 4085519829, "avg_ts": 4.136988, "stddev_ts": 0.500737, "samples_ns": [ 134779581818, 131284240064, 108653361383 ], "samples_ts": [ 3.79879, 3.89993, 4.71223 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-10T04:32:48Z", "avg_ns": 85214608074, "stddev_ns": 3908439799, "avg_ts": 1.597674, "stddev_ts": 0.451205, "samples_ns": [ 116109696605, 75062573316, 64471554303 ], "samples_ts": [ 1.10241, 1.70524, 1.98537 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 530 }, { "timestamp_utc": "2025-12-10T05:02:38.552967+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T04:37:06Z\",\n \"avg_ns\": 124031168738,\n \"stddev_ns\": 705627031,\n \"avg_ts\": 4.176712,\n \"stddev_ts\": 0.573801,\n \"samples_ns\": [ 105810833956, 133878563927, 132404108331 ],\n \"samples_ts\": [ 4.83882, 3.82436, 3.86695 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T04:45:58Z\",\n \"avg_ns\": 333039630673,\n \"stddev_ns\": 1071026171,\n \"avg_ts\": 1.537619,\n \"stddev_ts\": 0.024565,\n \"samples_ns\": [ 330454109205, 329453506130, 339211276686 ],\n \"samples_ts\": [ 1.54938, 1.55409, 1.50938 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T04:37:06Z", "avg_ns": 124031168738, "stddev_ns": 705627031, "avg_ts": 4.176712, "stddev_ts": 0.573801, "samples_ns": [ 105810833956, 133878563927, 132404108331 ], "samples_ts": [ 4.83882, 3.82436, 3.86695 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-10T04:45:58Z", "avg_ns": 333039630673, "stddev_ns": 1071026171, "avg_ts": 1.537619, "stddev_ts": 0.024565, "samples_ns": [ 330454109205, 329453506130, 339211276686 ], "samples_ts": [ 1.54938, 1.55409, 1.50938 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 531 }, { "timestamp_utc": "2025-12-10T05:09:30.430996+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T05:02:40Z\",\n \"avg_ns\": 39341939642,\n \"stddev_ns\": 3907219139,\n \"avg_ts\": 3.290950,\n \"stddev_ts\": 0.446289,\n \"samples_ns\": [ 42328298549, 42067880804, 33629639574 ],\n \"samples_ts\": [ 3.02398, 3.0427, 3.80617 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T05:05:20Z\",\n \"avg_ns\": 82953524588,\n \"stddev_ns\": 2617787468,\n \"avg_ts\": 1.651802,\n \"stddev_ts\": 0.480323,\n \"samples_ns\": [ 63247899940, 70249096961, 115363576864 ],\n \"samples_ts\": [ 2.02378, 1.82209, 1.10954 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T05:02:40Z", "avg_ns": 39341939642, "stddev_ns": 3907219139, "avg_ts": 3.29095, "stddev_ts": 0.446289, "samples_ns": [ 42328298549, 42067880804, 33629639574 ], "samples_ts": [ 3.02398, 3.0427, 3.80617 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-10T05:05:20Z", "avg_ns": 82953524588, "stddev_ns": 2617787468, "avg_ts": 1.651802, "stddev_ts": 0.480323, "samples_ns": [ 63247899940, 70249096961, 115363576864 ], "samples_ts": [ 2.02378, 1.82209, 1.10954 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 532 }, { "timestamp_utc": "2025-12-10T05:28:56.719670+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T05:09:32Z\",\n \"avg_ns\": 23898682357,\n \"stddev_ns\": 43351541,\n \"avg_ts\": 5.355956,\n \"stddev_ts\": 0.009725,\n \"samples_ns\": [ 23925929416, 23921425114, 23848692543 ],\n \"samples_ts\": [ 5.34984, 5.35085, 5.36717 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T05:11:24Z\",\n \"avg_ns\": 350540341358,\n \"stddev_ns\": 1432407006,\n \"avg_ts\": 1.462232,\n \"stddev_ts\": 0.059864,\n \"samples_ns\": [ 336041842174, 350911283331, 364667898569 ],\n \"samples_ts\": [ 1.52362, 1.45906, 1.40402 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T05:09:32Z", "avg_ns": 23898682357, "stddev_ns": 43351541, "avg_ts": 5.355956, "stddev_ts": 0.009725, "samples_ns": [ 23925929416, 23921425114, 23848692543 ], "samples_ts": [ 5.34984, 5.35085, 5.36717 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-10T05:11:24Z", "avg_ns": 350540341358, "stddev_ns": 1432407006, "avg_ts": 1.462232, "stddev_ts": 0.059864, "samples_ns": [ 336041842174, 350911283331, 364667898569 ], "samples_ts": [ 1.52362, 1.45906, 1.40402 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 533 }, { "timestamp_utc": "2025-12-10T05:41:36.982508+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T05:28:58Z\",\n \"avg_ns\": 120452968086,\n \"stddev_ns\": 1518056122,\n \"avg_ts\": 4.417620,\n \"stddev_ts\": 1.019976,\n \"samples_ns\": [ 112482247091, 153422788469, 95453868700 ],\n \"samples_ts\": [ 4.55183, 3.33718, 5.36385 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T05:37:06Z\",\n \"avg_ns\": 89902617973,\n \"stddev_ns\": 1500113072,\n \"avg_ts\": 1.494104,\n \"stddev_ts\": 0.425249,\n \"samples_ns\": [ 107102066487, 97989614704, 64616172728 ],\n \"samples_ts\": [ 1.19512, 1.30626, 1.98093 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T05:28:58Z", "avg_ns": 120452968086, "stddev_ns": 1518056122, "avg_ts": 4.41762, "stddev_ts": 1.019976, "samples_ns": [ 112482247091, 153422788469, 95453868700 ], "samples_ts": [ 4.55183, 3.33718, 5.36385 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-10T05:37:06Z", "avg_ns": 89902617973, "stddev_ns": 1500113072, "avg_ts": 1.494104, "stddev_ts": 0.425249, "samples_ns": [ 107102066487, 97989614704, 64616172728 ], "samples_ts": [ 1.19512, 1.30626, 1.98093 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 534 }, { "timestamp_utc": "2025-12-10T06:07:30.442684+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T05:41:37Z\",\n \"avg_ns\": 131366854821,\n \"stddev_ns\": 3954535757,\n \"avg_ts\": 3.962773,\n \"stddev_ts\": 0.615722,\n \"samples_ns\": [ 127878077313, 112355887879, 153866599273 ],\n \"samples_ts\": [ 4.00381, 4.55695, 3.32756 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T05:50:30Z\",\n \"avg_ns\": 339657362093,\n \"stddev_ns\": 4124570565,\n \"avg_ts\": 1.507549,\n \"stddev_ts\": 0.018183,\n \"samples_ns\": [ 336970030572, 337595771823, 344406283886 ],\n \"samples_ts\": [ 1.51942, 1.51661, 1.48662 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T05:41:37Z", "avg_ns": 131366854821, "stddev_ns": 3954535757, "avg_ts": 3.962773, "stddev_ts": 0.615722, "samples_ns": [ 127878077313, 112355887879, 153866599273 ], "samples_ts": [ 4.00381, 4.55695, 3.32756 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-10T05:50:30Z", "avg_ns": 339657362093, "stddev_ns": 4124570565, "avg_ts": 1.507549, "stddev_ts": 0.018183, "samples_ns": [ 336970030572, 337595771823, 344406283886 ], "samples_ts": [ 1.51942, 1.51661, 1.48662 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 535 }, { "timestamp_utc": "2025-12-10T06:14:24.869716+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T06:07:32Z\",\n \"avg_ns\": 40686618733,\n \"stddev_ns\": 1042078098,\n \"avg_ts\": 3.159742,\n \"stddev_ts\": 0.261251,\n \"samples_ns\": [ 42573487245, 42507075600, 36979293356 ],\n \"samples_ts\": [ 3.00657, 3.01126, 3.4614 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T06:10:16Z\",\n \"avg_ns\": 82439383310,\n \"stddev_ns\": 4272480171,\n \"avg_ts\": 1.661391,\n \"stddev_ts\": 0.478306,\n \"samples_ns\": [ 63946892124, 68522335415, 114848922391 ],\n \"samples_ts\": [ 2.00166, 1.868, 1.11451 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T06:07:32Z", "avg_ns": 40686618733, "stddev_ns": 1042078098, "avg_ts": 3.159742, "stddev_ts": 0.261251, "samples_ns": [ 42573487245, 42507075600, 36979293356 ], "samples_ts": [ 3.00657, 3.01126, 3.4614 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-10T06:10:16Z", "avg_ns": 82439383310, "stddev_ns": 4272480171, "avg_ts": 1.661391, "stddev_ts": 0.478306, "samples_ns": [ 63946892124, 68522335415, 114848922391 ], "samples_ts": [ 2.00166, 1.868, 1.11451 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 536 }, { "timestamp_utc": "2025-12-10T06:33:57.475252+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T06:14:26Z\",\n \"avg_ns\": 23971950023,\n \"stddev_ns\": 75897827,\n \"avg_ts\": 5.339610,\n \"stddev_ts\": 0.016888,\n \"samples_ns\": [ 24055322415, 23906871190, 23953656466 ],\n \"samples_ts\": [ 5.32107, 5.35411, 5.34365 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T06:16:21Z\",\n \"avg_ns\": 351791777370,\n \"stddev_ns\": 1169721955,\n \"avg_ts\": 1.457010,\n \"stddev_ts\": 0.059228,\n \"samples_ns\": [ 337532944870, 351724200822, 366118186420 ],\n \"samples_ts\": [ 1.51689, 1.45569, 1.39846 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T06:14:26Z", "avg_ns": 23971950023, "stddev_ns": 75897827, "avg_ts": 5.33961, "stddev_ts": 0.016888, "samples_ns": [ 24055322415, 23906871190, 23953656466 ], "samples_ts": [ 5.32107, 5.35411, 5.34365 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-10T06:16:21Z", "avg_ns": 351791777370, "stddev_ns": 1169721955, "avg_ts": 1.45701, "stddev_ts": 0.059228, "samples_ns": [ 337532944870, 351724200822, 366118186420 ], "samples_ts": [ 1.51689, 1.45569, 1.39846 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 537 }, { "timestamp_utc": "2025-12-10T06:46:46.336014+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T06:33:59Z\",\n \"avg_ns\": 122674889102,\n \"stddev_ns\": 3891015660,\n \"avg_ts\": 4.307246,\n \"stddev_ts\": 0.932020,\n \"samples_ns\": [ 120625275480, 150227836037, 97171555791 ],\n \"samples_ts\": [ 4.24455, 3.40816, 5.26903 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T06:42:15Z\",\n \"avg_ns\": 90071053466,\n \"stddev_ns\": 1178651748,\n \"avg_ts\": 1.499924,\n \"stddev_ts\": 0.436188,\n \"samples_ns\": [ 114086937700, 91388266232, 64737956467 ],\n \"samples_ts\": [ 1.12195, 1.40062, 1.9772 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T06:33:59Z", "avg_ns": 122674889102, "stddev_ns": 3891015660, "avg_ts": 4.307246, "stddev_ts": 0.93202, "samples_ns": [ 120625275480, 150227836037, 97171555791 ], "samples_ts": [ 4.24455, 3.40816, 5.26903 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-10T06:42:15Z", "avg_ns": 90071053466, "stddev_ns": 1178651748, "avg_ts": 1.499924, "stddev_ts": 0.436188, "samples_ns": [ 114086937700, 91388266232, 64737956467 ], "samples_ts": [ 1.12195, 1.40062, 1.9772 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 538 }, { "timestamp_utc": "2025-12-10T07:12:41.320977+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T06:46:47Z\",\n \"avg_ns\": 129175250753,\n \"stddev_ns\": 4248870805,\n \"avg_ts\": 3.999399,\n \"stddev_ts\": 0.448217,\n \"samples_ns\": [ 120709069528, 119782752811, 147033929921 ],\n \"samples_ts\": [ 4.2416, 4.27441, 3.48219 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T06:55:40Z\",\n \"avg_ns\": 339948806117,\n \"stddev_ns\": 1560117533,\n \"avg_ts\": 1.506368,\n \"stddev_ts\": 0.024088,\n \"samples_ns\": [ 336796451312, 336765668735, 346284298304 ],\n \"samples_ts\": [ 1.52021, 1.52034, 1.47855 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T06:46:47Z", "avg_ns": 129175250753, "stddev_ns": 4248870805, "avg_ts": 3.999399, "stddev_ts": 0.448217, "samples_ns": [ 120709069528, 119782752811, 147033929921 ], "samples_ts": [ 4.2416, 4.27441, 3.48219 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-10T06:55:40Z", "avg_ns": 339948806117, "stddev_ns": 1560117533, "avg_ts": 1.506368, "stddev_ts": 0.024088, "samples_ns": [ 336796451312, 336765668735, 346284298304 ], "samples_ts": [ 1.52021, 1.52034, 1.47855 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q8_0", "threads": 1, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 539 }, { "timestamp_utc": "2025-12-10T07:19:05.143400+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T07:12:43Z\",\n \"avg_ns\": 36988853765,\n \"stddev_ns\": 3879791179,\n \"avg_ts\": 3.623834,\n \"stddev_ts\": 1.015625,\n \"samples_ns\": [ 42151626978, 42129235958, 26685698360 ],\n \"samples_ts\": [ 3.03666, 3.03827, 4.79658 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T07:15:16Z\",\n \"avg_ns\": 76144409386,\n \"stddev_ns\": 3343188169,\n \"avg_ts\": 2.268538,\n \"stddev_ts\": 1.408952,\n \"samples_ns\": [ 34022218830, 61537097825, 132873911504 ],\n \"samples_ts\": [ 3.76225, 2.08005, 0.963319 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T07:12:43Z", "avg_ns": 36988853765, "stddev_ns": 3879791179, "avg_ts": 3.623834, "stddev_ts": 1.015625, "samples_ns": [ 42151626978, 42129235958, 26685698360 ], "samples_ts": [ 3.03666, 3.03827, 4.79658 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-10T07:15:16Z", "avg_ns": 76144409386, "stddev_ns": 3343188169, "avg_ts": 2.268538, "stddev_ts": 1.408952, "samples_ns": [ 34022218830, 61537097825, 132873911504 ], "samples_ts": [ 3.76225, 2.08005, 0.963319 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 512, "ubatch_size": 128, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 540 }, { "timestamp_utc": "2025-12-10T07:36:22.129779+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T07:19:06Z\",\n \"avg_ns\": 17138282125,\n \"stddev_ns\": 4269000930,\n \"avg_ts\": 8.580173,\n \"stddev_ts\": 3.331518,\n \"samples_ns\": [ 27042076341, 12213702836, 12159067198 ],\n \"samples_ts\": [ 4.73336, 10.48, 10.5271 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T07:20:40Z\",\n \"avg_ns\": 313710901778,\n \"stddev_ns\": 2646946349,\n \"avg_ts\": 1.632153,\n \"stddev_ts\": 0.013705,\n \"samples_ns\": [ 312139673132, 312226107602, 316766924602 ],\n \"samples_ts\": [ 1.64029, 1.63984, 1.61633 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T07:19:06Z", "avg_ns": 17138282125, "stddev_ns": 4269000930, "avg_ts": 8.580173, "stddev_ts": 3.331518, "samples_ns": [ 27042076341, 12213702836, 12159067198 ], "samples_ts": [ 4.73336, 10.48, 10.5271 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-10T07:20:40Z", "avg_ns": 313710901778, "stddev_ns": 2646946349, "avg_ts": 1.632153, "stddev_ts": 0.013705, "samples_ns": [ 312139673132, 312226107602, 316766924602 ], "samples_ts": [ 1.64029, 1.63984, 1.61633 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 512, "ubatch_size": 128, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 541 }, { "timestamp_utc": "2025-12-10T07:48:29.806162+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T07:36:23Z\",\n \"avg_ns\": 115227117915,\n \"stddev_ns\": 120838052,\n \"avg_ts\": 4.873556,\n \"stddev_ts\": 1.597047,\n \"samples_ns\": [ 88892384862, 87806698020, 168982270864 ],\n \"samples_ts\": [ 5.75977, 5.83099, 3.0299 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T07:44:59Z\",\n \"avg_ns\": 69800315813,\n \"stddev_ns\": 2255928709,\n \"avg_ts\": 2.508003,\n \"stddev_ts\": 1.337981,\n \"samples_ns\": [ 41415423946, 37038339711, 130947183783 ],\n \"samples_ts\": [ 3.09064, 3.45588, 0.977493 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T07:36:23Z", "avg_ns": 115227117915, "stddev_ns": 120838052, "avg_ts": 4.873556, "stddev_ts": 1.597047, "samples_ns": [ 88892384862, 87806698020, 168982270864 ], "samples_ts": [ 5.75977, 5.83099, 3.0299 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-10T07:44:59Z", "avg_ns": 69800315813, "stddev_ns": 2255928709, "avg_ts": 2.508003, "stddev_ts": 1.337981, "samples_ns": [ 41415423946, 37038339711, 130947183783 ], "samples_ts": [ 3.09064, 3.45588, 0.977493 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 512, "ubatch_size": 128, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 542 }, { "timestamp_utc": "2025-12-10T08:11:40.835578+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T07:48:31Z\",\n \"avg_ns\": 104392468396,\n \"stddev_ns\": 2478021510,\n \"avg_ts\": 5.910327,\n \"stddev_ts\": 2.857340,\n \"samples_ns\": [ 58489605129, 168464571833, 86223228226 ],\n \"samples_ts\": [ 8.75369, 3.03921, 5.93808 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T07:55:43Z\",\n \"avg_ns\": 318910421720,\n \"stddev_ns\": 2286255743,\n \"avg_ts\": 1.606664,\n \"stddev_ts\": 0.053202,\n \"samples_ns\": [ 312616674111, 312772954682, 331341636369 ],\n \"samples_ts\": [ 1.63779, 1.63697, 1.54523 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T07:48:31Z", "avg_ns": 104392468396, "stddev_ns": 2478021510, "avg_ts": 5.910327, "stddev_ts": 2.85734, "samples_ns": [ 58489605129, 168464571833, 86223228226 ], "samples_ts": [ 8.75369, 3.03921, 5.93808 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-10T07:55:43Z", "avg_ns": 318910421720, "stddev_ns": 2286255743, "avg_ts": 1.606664, "stddev_ts": 0.053202, "samples_ns": [ 312616674111, 312772954682, 331341636369 ], "samples_ts": [ 1.63779, 1.63697, 1.54523 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 512, "ubatch_size": 128, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 543 }, { "timestamp_utc": "2025-12-10T08:18:01.362985+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T08:11:42Z\",\n \"avg_ns\": 42075013817,\n \"stddev_ns\": 26930277,\n \"avg_ts\": 3.042186,\n \"stddev_ts\": 0.001947,\n \"samples_ns\": [ 42105042369, 42066993277, 42053005806 ],\n \"samples_ts\": [ 3.04002, 3.04277, 3.04378 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T08:14:31Z\",\n \"avg_ns\": 69632816879,\n \"stddev_ns\": 4149481985,\n \"avg_ts\": 2.320977,\n \"stddev_ts\": 1.336335,\n \"samples_ns\": [ 62167873179, 33961979468, 112768597990 ],\n \"samples_ts\": [ 2.05894, 3.76892, 1.13507 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T08:11:42Z", "avg_ns": 42075013817, "stddev_ns": 26930277, "avg_ts": 3.042186, "stddev_ts": 0.001947, "samples_ns": [ 42105042369, 42066993277, 42053005806 ], "samples_ts": [ 3.04002, 3.04277, 3.04378 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-10T08:14:31Z", "avg_ns": 69632816879, "stddev_ns": 4149481985, "avg_ts": 2.320977, "stddev_ts": 1.336335, "samples_ns": [ 62167873179, 33961979468, 112768597990 ], "samples_ts": [ 2.05894, 3.76892, 1.13507 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 512, "ubatch_size": 256, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 544 }, { "timestamp_utc": "2025-12-10T08:35:57.364522+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T08:18:03Z\",\n \"avg_ns\": 32166570258,\n \"stddev_ns\": 1923870133,\n \"avg_ts\": 5.364996,\n \"stddev_ts\": 3.992852,\n \"samples_ns\": [ 41708865764, 41959449583, 12831395429 ],\n \"samples_ts\": [ 3.06889, 3.05056, 9.97553 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T08:20:21Z\",\n \"avg_ns\": 311719246428,\n \"stddev_ns\": 97121137,\n \"avg_ts\": 1.642504,\n \"stddev_ts\": 0.000512,\n \"samples_ns\": [ 311831117065, 311656534370, 311670087850 ],\n \"samples_ts\": [ 1.64191, 1.64283, 1.64276 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T08:18:03Z", "avg_ns": 32166570258, "stddev_ns": 1923870133, "avg_ts": 5.364996, "stddev_ts": 3.992852, "samples_ns": [ 41708865764, 41959449583, 12831395429 ], "samples_ts": [ 3.06889, 3.05056, 9.97553 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-10T08:20:21Z", "avg_ns": 311719246428, "stddev_ns": 97121137, "avg_ts": 1.642504, "stddev_ts": 0.000512, "samples_ns": [ 311831117065, 311656534370, 311670087850 ], "samples_ts": [ 1.64191, 1.64283, 1.64276 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 512, "ubatch_size": 256, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 545 }, { "timestamp_utc": "2025-12-10T08:47:46.395362+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T08:35:58Z\",\n \"avg_ns\": 123585610856,\n \"stddev_ns\": 4278138532,\n \"avg_ts\": 5.117701,\n \"stddev_ts\": 3.168916,\n \"samples_ns\": [ 145938910216, 58391861587, 166426060766 ],\n \"samples_ts\": [ 3.50832, 8.76835, 3.07644 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T08:44:15Z\",\n \"avg_ns\": 69859567999,\n \"stddev_ns\": 3960628979,\n \"avg_ts\": 2.250974,\n \"stddev_ts\": 1.328001,\n \"samples_ns\": [ 73706774079, 34049625993, 101822303925 ],\n \"samples_ts\": [ 1.73661, 3.75922, 1.25709 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T08:35:58Z", "avg_ns": 123585610856, "stddev_ns": 4278138532, "avg_ts": 5.117701, "stddev_ts": 3.168916, "samples_ns": [ 145938910216, 58391861587, 166426060766 ], "samples_ts": [ 3.50832, 8.76835, 3.07644 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-10T08:44:15Z", "avg_ns": 69859567999, "stddev_ns": 3960628979, "avg_ts": 2.250974, "stddev_ts": 1.328001, "samples_ns": [ 73706774079, 34049625993, 101822303925 ], "samples_ts": [ 1.73661, 3.75922, 1.25709 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 512, "ubatch_size": 256, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 546 }, { "timestamp_utc": "2025-12-10T09:11:02.159587+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T08:47:48Z\",\n \"avg_ns\": 104083348198,\n \"stddev_ns\": 3871761299,\n \"avg_ts\": 6.120569,\n \"stddev_ts\": 3.829668,\n \"samples_ns\": [ 48660344791, 144260889578, 119328810226 ],\n \"samples_ts\": [ 10.5219, 3.54913, 4.29067 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T08:55:29Z\",\n \"avg_ns\": 310695042665,\n \"stddev_ns\": 3705977971,\n \"avg_ts\": 1.647970,\n \"stddev_ts\": 0.011310,\n \"samples_ns\": [ 311903982823, 311938476221, 308242668953 ],\n \"samples_ts\": [ 1.64153, 1.64135, 1.66103 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T08:47:48Z", "avg_ns": 104083348198, "stddev_ns": 3871761299, "avg_ts": 6.120569, "stddev_ts": 3.829668, "samples_ns": [ 48660344791, 144260889578, 119328810226 ], "samples_ts": [ 10.5219, 3.54913, 4.29067 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-10T08:55:29Z", "avg_ns": 310695042665, "stddev_ns": 3705977971, "avg_ts": 1.64797, "stddev_ts": 0.01131, "samples_ns": [ 311903982823, 311938476221, 308242668953 ], "samples_ts": [ 1.64153, 1.64135, 1.66103 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 512, "ubatch_size": 256, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 547 }, { "timestamp_utc": "2025-12-10T09:17:04.044071+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T09:11:03Z\",\n \"avg_ns\": 41826457672,\n \"stddev_ns\": 45542451,\n \"avg_ts\": 3.060266,\n \"stddev_ts\": 0.003334,\n \"samples_ns\": [ 41775770796, 41863932129, 41839670093 ],\n \"samples_ts\": [ 3.06398, 3.05752, 3.0593 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T09:13:33Z\",\n \"avg_ns\": 69844136181,\n \"stddev_ns\": 4145708751,\n \"avg_ts\": 2.252154,\n \"stddev_ts\": 1.332931,\n \"samples_ns\": [ 101440355210, 33973137764, 74118915569 ],\n \"samples_ts\": [ 1.26183, 3.76768, 1.72695 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T09:11:03Z", "avg_ns": 41826457672, "stddev_ns": 45542451, "avg_ts": 3.060266, "stddev_ts": 0.003334, "samples_ns": [ 41775770796, 41863932129, 41839670093 ], "samples_ts": [ 3.06398, 3.05752, 3.0593 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-10T09:13:33Z", "avg_ns": 69844136181, "stddev_ns": 4145708751, "avg_ts": 2.252154, "stddev_ts": 1.332931, "samples_ns": [ 101440355210, 33973137764, 74118915569 ], "samples_ts": [ 1.26183, 3.76768, 1.72695 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 512, "ubatch_size": 512, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 548 }, { "timestamp_utc": "2025-12-10T09:35:39.270953+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T09:17:05Z\",\n \"avg_ns\": 42013703862,\n \"stddev_ns\": 63471857,\n \"avg_ts\": 3.046630,\n \"stddev_ts\": 0.004603,\n \"samples_ns\": [ 42076781184, 41949844692, 42014485710 ],\n \"samples_ts\": [ 3.04206, 3.05126, 3.04657 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T09:19:54Z\",\n \"avg_ns\": 314661939982,\n \"stddev_ns\": 4133785063,\n \"avg_ts\": 1.627329,\n \"stddev_ts\": 0.021219,\n \"samples_ns\": [ 319430752750, 312456410402, 312098656796 ],\n \"samples_ts\": [ 1.60285, 1.63863, 1.64051 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T09:17:05Z", "avg_ns": 42013703862, "stddev_ns": 63471857, "avg_ts": 3.04663, "stddev_ts": 0.004603, "samples_ns": [ 42076781184, 41949844692, 42014485710 ], "samples_ts": [ 3.04206, 3.05126, 3.04657 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-10T09:19:54Z", "avg_ns": 314661939982, "stddev_ns": 4133785063, "avg_ts": 1.627329, "stddev_ts": 0.021219, "samples_ns": [ 319430752750, 312456410402, 312098656796 ], "samples_ts": [ 1.60285, 1.63863, 1.64051 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 512, "ubatch_size": 512, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 549 }, { "timestamp_utc": "2025-12-10T09:46:29.482327+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T09:35:40Z\",\n \"avg_ns\": 114638464568,\n \"stddev_ns\": 2416383815,\n \"avg_ts\": 5.715659,\n \"stddev_ts\": 3.753549,\n \"samples_ns\": [ 166938908553, 51143171288, 125833313865 ],\n \"samples_ts\": [ 3.06699, 10.0111, 4.06887 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T09:42:59Z\",\n \"avg_ns\": 69665578939,\n \"stddev_ns\": 1613867823,\n \"avg_ts\": 2.479386,\n \"stddev_ts\": 1.388150,\n \"samples_ns\": [ 126959333986, 33987983974, 48049418859 ],\n \"samples_ts\": [ 1.0082, 3.76604, 2.66392 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T09:35:40Z", "avg_ns": 114638464568, "stddev_ns": 2416383815, "avg_ts": 5.715659, "stddev_ts": 3.753549, "samples_ns": [ 166938908553, 51143171288, 125833313865 ], "samples_ts": [ 3.06699, 10.0111, 4.06887 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-10T09:42:59Z", "avg_ns": 69665578939, "stddev_ns": 1613867823, "avg_ts": 2.479386, "stddev_ts": 1.38815, "samples_ns": [ 126959333986, 33987983974, 48049418859 ], "samples_ts": [ 1.0082, 3.76604, 2.66392 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 512, "ubatch_size": 512, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 550 }, { "timestamp_utc": "2025-12-10T10:10:40.289614+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T09:46:31Z\",\n \"avg_ns\": 114366650448,\n \"stddev_ns\": 4269850228,\n \"avg_ts\": 4.914410,\n \"stddev_ts\": 1.647477,\n \"samples_ns\": [ 82109951003, 94130969772, 166859030570 ],\n \"samples_ts\": [ 6.23554, 5.43923, 3.06846 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T09:55:01Z\",\n \"avg_ns\": 312556746525,\n \"stddev_ns\": 1198303510,\n \"avg_ts\": 1.638119,\n \"stddev_ts\": 0.006267,\n \"samples_ns\": [ 313938443336, 311930055128, 311801741113 ],\n \"samples_ts\": [ 1.63089, 1.64139, 1.64207 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T09:46:31Z", "avg_ns": 114366650448, "stddev_ns": 4269850228, "avg_ts": 4.91441, "stddev_ts": 1.647477, "samples_ns": [ 82109951003, 94130969772, 166859030570 ], "samples_ts": [ 6.23554, 5.43923, 3.06846 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-10T09:55:01Z", "avg_ns": 312556746525, "stddev_ns": 1198303510, "avg_ts": 1.638119, "stddev_ts": 0.006267, "samples_ns": [ 313938443336, 311930055128, 311801741113 ], "samples_ts": [ 1.63089, 1.64139, 1.64207 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 512, "ubatch_size": 512, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 551 }, { "timestamp_utc": "2025-12-10T10:16:03.977762+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T10:10:41Z\",\n \"avg_ns\": 28348117258,\n \"stddev_ns\": 3801870360,\n \"avg_ts\": 5.867990,\n \"stddev_ts\": 3.945503,\n \"samples_ns\": [ 12338705717, 30490585259, 42215060798 ],\n \"samples_ts\": [ 10.3739, 4.19802, 3.03209 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T10:12:18Z\",\n \"avg_ns\": 74734206067,\n \"stddev_ns\": 4184899326,\n \"avg_ts\": 2.313206,\n \"stddev_ts\": 1.424631,\n \"samples_ns\": [ 131140549199, 59480321484, 33581747518 ],\n \"samples_ts\": [ 0.976052, 2.15197, 3.81159 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T10:10:41Z", "avg_ns": 28348117258, "stddev_ns": 3801870360, "avg_ts": 5.86799, "stddev_ts": 3.945503, "samples_ns": [ 12338705717, 30490585259, 42215060798 ], "samples_ts": [ 10.3739, 4.19802, 3.03209 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-10T10:12:18Z", "avg_ns": 74734206067, "stddev_ns": 4184899326, "avg_ts": 2.313206, "stddev_ts": 1.424631, "samples_ns": [ 131140549199, 59480321484, 33581747518 ], "samples_ts": [ 0.976052, 2.15197, 3.81159 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 552 }, { "timestamp_utc": "2025-12-10T10:35:21.122986+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T10:16:04Z\",\n \"avg_ns\": 42245737164,\n \"stddev_ns\": 175007492,\n \"avg_ts\": 3.029926,\n \"stddev_ts\": 0.012581,\n \"samples_ns\": [ 42359790429, 42333180148, 42044240916 ],\n \"samples_ts\": [ 3.02173, 3.02363, 3.04441 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T10:18:40Z\",\n \"avg_ns\": 333151736012,\n \"stddev_ns\": 2321629369,\n \"avg_ts\": 1.540042,\n \"stddev_ts\": 0.087185,\n \"samples_ns\": [ 347408620263, 339624297325, 312422290449 ],\n \"samples_ts\": [ 1.47377, 1.50755, 1.63881 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T10:16:04Z", "avg_ns": 42245737164, "stddev_ns": 175007492, "avg_ts": 3.029926, "stddev_ts": 0.012581, "samples_ns": [ 42359790429, 42333180148, 42044240916 ], "samples_ts": [ 3.02173, 3.02363, 3.04441 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-10T10:18:40Z", "avg_ns": 333151736012, "stddev_ns": 2321629369, "avg_ts": 1.540042, "stddev_ts": 0.087185, "samples_ns": [ 347408620263, 339624297325, 312422290449 ], "samples_ts": [ 1.47377, 1.50755, 1.63881 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 553 }, { "timestamp_utc": "2025-12-10T10:45:59.850464+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T10:35:22Z\",\n \"avg_ns\": 114700545549,\n \"stddev_ns\": 4205011862,\n \"avg_ts\": 4.901090,\n \"stddev_ts\": 1.644093,\n \"samples_ns\": [ 167423617689, 94323923605, 82354095355 ],\n \"samples_ts\": [ 3.05811, 5.4281, 6.21706 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T10:41:57Z\",\n \"avg_ns\": 80286501311,\n \"stddev_ns\": 1567168385,\n \"avg_ts\": 2.150186,\n \"stddev_ts\": 1.443376,\n \"samples_ns\": [ 132772095927, 74019767652, 34067640355 ],\n \"samples_ts\": [ 0.964058, 1.72927, 3.75723 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T10:35:22Z", "avg_ns": 114700545549, "stddev_ns": 4205011862, "avg_ts": 4.90109, "stddev_ts": 1.644093, "samples_ns": [ 167423617689, 94323923605, 82354095355 ], "samples_ts": [ 3.05811, 5.4281, 6.21706 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-10T10:41:57Z", "avg_ns": 80286501311, "stddev_ns": 1567168385, "avg_ts": 2.150186, "stddev_ts": 1.443376, "samples_ns": [ 132772095927, 74019767652, 34067640355 ], "samples_ts": [ 0.964058, 1.72927, 3.75723 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 554 }, { "timestamp_utc": "2025-12-10T11:10:30.222646+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T10:46:00Z\",\n \"avg_ns\": 115532182895,\n \"stddev_ns\": 1465456826,\n \"avg_ts\": 5.362748,\n \"stddev_ts\": 3.008657,\n \"samples_ns\": [ 118948303127, 58463685247, 169184560312 ],\n \"samples_ts\": [ 4.30439, 8.75757, 3.02628 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T10:54:13Z\",\n \"avg_ns\": 325255961114,\n \"stddev_ns\": 4167973753,\n \"avg_ts\": 1.577770,\n \"stddev_ts\": 0.091081,\n \"samples_ns\": [ 347661956153, 314802666248, 313303260942 ],\n \"samples_ts\": [ 1.47269, 1.62642, 1.6342 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T10:46:00Z", "avg_ns": 115532182895, "stddev_ns": 1465456826, "avg_ts": 5.362748, "stddev_ts": 3.008657, "samples_ns": [ 118948303127, 58463685247, 169184560312 ], "samples_ts": [ 4.30439, 8.75757, 3.02628 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-10T10:54:13Z", "avg_ns": 325255961114, "stddev_ns": 4167973753, "avg_ts": 1.57777, "stddev_ts": 0.091081, "samples_ns": [ 347661956153, 314802666248, 313303260942 ], "samples_ts": [ 1.47269, 1.62642, 1.6342 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 555 }, { "timestamp_utc": "2025-12-10T11:15:54.471187+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T11:10:31Z\",\n \"avg_ns\": 19922996524,\n \"stddev_ns\": 4046033078,\n \"avg_ts\": 8.043971,\n \"stddev_ts\": 3.784352,\n \"samples_ns\": [ 12156994501, 12910528491, 34701466580 ],\n \"samples_ts\": [ 10.5289, 9.91439, 3.6886 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T11:11:43Z\",\n \"avg_ns\": 83356875210,\n \"stddev_ns\": 1516127477,\n \"avg_ts\": 2.098632,\n \"stddev_ts\": 1.517637,\n \"samples_ns\": [ 130782875283, 85831394029, 33456356318 ],\n \"samples_ts\": [ 0.978721, 1.4913, 3.82588 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T11:10:31Z", "avg_ns": 19922996524, "stddev_ns": 4046033078, "avg_ts": 8.043971, "stddev_ts": 3.784352, "samples_ns": [ 12156994501, 12910528491, 34701466580 ], "samples_ts": [ 10.5289, 9.91439, 3.6886 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-10T11:11:43Z", "avg_ns": 83356875210, "stddev_ns": 1516127477, "avg_ts": 2.098632, "stddev_ts": 1.517637, "samples_ns": [ 130782875283, 85831394029, 33456356318 ], "samples_ts": [ 0.978721, 1.4913, 3.82588 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 556 }, { "timestamp_utc": "2025-12-10T11:35:12.512505+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T11:15:55Z\",\n \"avg_ns\": 39597219152,\n \"stddev_ns\": 3170809201,\n \"avg_ts\": 3.261108,\n \"stddev_ts\": 0.386328,\n \"samples_ns\": [ 34527417332, 42147453656, 42116786469 ],\n \"samples_ts\": [ 3.7072, 3.03696, 3.03917 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T11:18:07Z\",\n \"avg_ns\": 341352908367,\n \"stddev_ns\": 3828608661,\n \"avg_ts\": 1.501093,\n \"stddev_ts\": 0.052031,\n \"samples_ns\": [ 348248979225, 347849481300, 327960264577 ],\n \"samples_ts\": [ 1.47021, 1.4719, 1.56116 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T11:15:55Z", "avg_ns": 39597219152, "stddev_ns": 3170809201, "avg_ts": 3.261108, "stddev_ts": 0.386328, "samples_ns": [ 34527417332, 42147453656, 42116786469 ], "samples_ts": [ 3.7072, 3.03696, 3.03917 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-10T11:18:07Z", "avg_ns": 341352908367, "stddev_ns": 3828608661, "avg_ts": 1.501093, "stddev_ts": 0.052031, "samples_ns": [ 348248979225, 347849481300, 327960264577 ], "samples_ts": [ 1.47021, 1.4719, 1.56116 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 557 }, { "timestamp_utc": "2025-12-10T11:45:50.163303+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T11:35:13Z\",\n \"avg_ns\": 106563267546,\n \"stddev_ns\": 4276650501,\n \"avg_ts\": 5.645734,\n \"stddev_ts\": 2.990336,\n \"samples_ns\": [ 145880659778, 117317267409, 56491875453 ],\n \"samples_ts\": [ 3.50972, 4.36423, 9.06325 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T11:41:21Z\",\n \"avg_ns\": 89053945936,\n \"stddev_ns\": 4162335331,\n \"avg_ts\": 2.005253,\n \"stddev_ts\": 1.547116,\n \"samples_ns\": [ 131813656118, 101520275951, 33827905740 ],\n \"samples_ts\": [ 0.971068, 1.26083, 3.78386 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T11:35:13Z", "avg_ns": 106563267546, "stddev_ns": 4276650501, "avg_ts": 5.645734, "stddev_ts": 2.990336, "samples_ns": [ 145880659778, 117317267409, 56491875453 ], "samples_ts": [ 3.50972, 4.36423, 9.06325 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-10T11:41:21Z", "avg_ns": 89053945936, "stddev_ns": 4162335331, "avg_ts": 2.005253, "stddev_ts": 1.547116, "samples_ns": [ 131813656118, 101520275951, 33827905740 ], "samples_ts": [ 0.971068, 1.26083, 3.78386 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 558 }, { "timestamp_utc": "2025-12-10T12:10:19.224259+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T11:45:51Z\",\n \"avg_ns\": 113711370829,\n \"stddev_ns\": 3552695697,\n \"avg_ts\": 5.845454,\n \"stddev_ts\": 4.060061,\n \"samples_ns\": [ 143672675238, 48608849088, 148852588162 ],\n \"samples_ts\": [ 3.56366, 10.5331, 3.43964 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T11:53:32Z\",\n \"avg_ns\": 335257716830,\n \"stddev_ns\": 805553967,\n \"avg_ts\": 1.530919,\n \"stddev_ts\": 0.094247,\n \"samples_ns\": [ 346506066603, 347022040585, 312245043303 ],\n \"samples_ts\": [ 1.47761, 1.47541, 1.63974 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T11:45:51Z", "avg_ns": 113711370829, "stddev_ns": 3552695697, "avg_ts": 5.845454, "stddev_ts": 4.060061, "samples_ns": [ 143672675238, 48608849088, 148852588162 ], "samples_ts": [ 3.56366, 10.5331, 3.43964 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-10T11:53:32Z", "avg_ns": 335257716830, "stddev_ns": 805553967, "avg_ts": 1.530919, "stddev_ts": 0.094247, "samples_ns": [ 346506066603, 347022040585, 312245043303 ], "samples_ts": [ 1.47761, 1.47541, 1.63974 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 559 }, { "timestamp_utc": "2025-12-10T12:15:44.318774+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T12:10:20Z\",\n \"avg_ns\": 12537936457,\n \"stddev_ns\": 588496656,\n \"avg_ts\": 10.223637,\n \"stddev_ts\": 0.467233,\n \"samples_ns\": [ 12186371395, 12210102275, 13217335703 ],\n \"samples_ts\": [ 10.5035, 10.4831, 9.68425 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T12:11:09Z\",\n \"avg_ns\": 91270160442,\n \"stddev_ns\": 3860598298,\n \"avg_ts\": 1.988421,\n \"stddev_ts\": 1.597516,\n \"samples_ns\": [ 124795318124, 115616371866, 33398791337 ],\n \"samples_ts\": [ 1.02568, 1.10711, 3.83247 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T12:10:20Z", "avg_ns": 12537936457, "stddev_ns": 588496656, "avg_ts": 10.223637, "stddev_ts": 0.467233, "samples_ns": [ 12186371395, 12210102275, 13217335703 ], "samples_ts": [ 10.5035, 10.4831, 9.68425 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-10T12:11:09Z", "avg_ns": 91270160442, "stddev_ns": 3860598298, "avg_ts": 1.988421, "stddev_ts": 1.597516, "samples_ns": [ 124795318124, 115616371866, 33398791337 ], "samples_ts": [ 1.02568, 1.10711, 3.83247 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 560 }, { "timestamp_utc": "2025-12-10T12:35:05.063794+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T12:15:45Z\",\n \"avg_ns\": 31399759114,\n \"stddev_ns\": 293801391,\n \"avg_ts\": 5.249530,\n \"stddev_ts\": 3.578793,\n \"samples_ns\": [ 13648429896, 38388170328, 42162677120 ],\n \"samples_ts\": [ 9.37837, 3.33436, 3.03586 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T12:17:31Z\",\n \"avg_ns\": 350615104604,\n \"stddev_ns\": 2503672059,\n \"avg_ts\": 1.460414,\n \"stddev_ts\": 0.016496,\n \"samples_ns\": [ 346096268789, 352454002274, 353295042751 ],\n \"samples_ts\": [ 1.47936, 1.45267, 1.44921 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T12:15:45Z", "avg_ns": 31399759114, "stddev_ns": 293801391, "avg_ts": 5.24953, "stddev_ts": 3.578793, "samples_ns": [ 13648429896, 38388170328, 42162677120 ], "samples_ts": [ 9.37837, 3.33436, 3.03586 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-10T12:17:31Z", "avg_ns": 350615104604, "stddev_ns": 2503672059, "avg_ts": 1.460414, "stddev_ts": 0.016496, "samples_ns": [ 346096268789, 352454002274, 353295042751 ], "samples_ts": [ 1.47936, 1.45267, 1.44921 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 561 }, { "timestamp_utc": "2025-12-10T12:45:44.872140+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T12:35:06Z\",\n \"avg_ns\": 104231768254,\n \"stddev_ns\": 3431123056,\n \"avg_ts\": 6.051484,\n \"stddev_ts\": 3.742152,\n \"samples_ns\": [ 127900939808, 135424226216, 49370138738 ],\n \"samples_ts\": [ 4.0031, 3.78071, 10.3706 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T12:41:07Z\",\n \"avg_ns\": 92215661288,\n \"stddev_ns\": 3254229682,\n \"avg_ts\": 1.954339,\n \"stddev_ts\": 1.556008,\n \"samples_ns\": [ 123895062274, 118626471014, 34125450577 ],\n \"samples_ts\": [ 1.03313, 1.07902, 3.75087 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T12:35:06Z", "avg_ns": 104231768254, "stddev_ns": 3431123056, "avg_ts": 6.051484, "stddev_ts": 3.742152, "samples_ns": [ 127900939808, 135424226216, 49370138738 ], "samples_ts": [ 4.0031, 3.78071, 10.3706 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-10T12:41:07Z", "avg_ns": 92215661288, "stddev_ns": 3254229682, "avg_ts": 1.954339, "stddev_ts": 1.556008, "samples_ns": [ 123895062274, 118626471014, 34125450577 ], "samples_ts": [ 1.03313, 1.07902, 3.75087 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 562 }, { "timestamp_utc": "2025-12-10T13:10:18.175104+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T12:45:45Z\",\n \"avg_ns\": 114808003795,\n \"stddev_ns\": 1454191006,\n \"avg_ts\": 5.807928,\n \"stddev_ts\": 4.042668,\n \"samples_ns\": [ 156360677061, 48900929466, 139162404859 ],\n \"samples_ts\": [ 3.27448, 10.4701, 3.67915 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T12:53:18Z\",\n \"avg_ns\": 339588950518,\n \"stddev_ns\": 634311477,\n \"avg_ts\": 1.511976,\n \"stddev_ts\": 0.100269,\n \"samples_ns\": [ 353169364970, 351032601425, 314564885159 ],\n \"samples_ts\": [ 1.44973, 1.45855, 1.62765 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T12:45:45Z", "avg_ns": 114808003795, "stddev_ns": 1454191006, "avg_ts": 5.807928, "stddev_ts": 4.042668, "samples_ns": [ 156360677061, 48900929466, 139162404859 ], "samples_ts": [ 3.27448, 10.4701, 3.67915 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-10T12:53:18Z", "avg_ns": 339588950518, "stddev_ns": 634311477, "avg_ts": 1.511976, "stddev_ts": 0.100269, "samples_ns": [ 353169364970, 351032601425, 314564885159 ], "samples_ts": [ 1.44973, 1.45855, 1.62765 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 563 }, { "timestamp_utc": "2025-12-10T13:15:45.212228+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T13:10:19Z\",\n \"avg_ns\": 12358525100,\n \"stddev_ns\": 392407408,\n \"avg_ts\": 10.364064,\n \"stddev_ts\": 0.323184,\n \"samples_ns\": [ 12144636941, 12119532133, 12811406228 ],\n \"samples_ts\": [ 10.5396, 10.5615, 9.9911 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T13:11:08Z\",\n \"avg_ns\": 92078636602,\n \"stddev_ns\": 902487694,\n \"avg_ts\": 1.958394,\n \"stddev_ts\": 1.560054,\n \"samples_ns\": [ 124489067866, 117699426673, 34047415269 ],\n \"samples_ts\": [ 1.0282, 1.08752, 3.75946 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T13:10:19Z", "avg_ns": 12358525100, "stddev_ns": 392407408, "avg_ts": 10.364064, "stddev_ts": 0.323184, "samples_ns": [ 12144636941, 12119532133, 12811406228 ], "samples_ts": [ 10.5396, 10.5615, 9.9911 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-10T13:11:08Z", "avg_ns": 92078636602, "stddev_ns": 902487694, "avg_ts": 1.958394, "stddev_ts": 1.560054, "samples_ns": [ 124489067866, 117699426673, 34047415269 ], "samples_ts": [ 1.0282, 1.08752, 3.75946 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 564 }, { "timestamp_utc": "2025-12-10T13:35:07.870768+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T13:15:46Z\",\n \"avg_ns\": 31977283537,\n \"stddev_ns\": 3976410975,\n \"avg_ts\": 5.084745,\n \"stddev_ts\": 3.374493,\n \"samples_ns\": [ 14254436556, 39574470381, 42102943674 ],\n \"samples_ts\": [ 8.97966, 3.23441, 3.04017 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T13:17:34Z\",\n \"avg_ns\": 350740455362,\n \"stddev_ns\": 3696040031,\n \"avg_ts\": 1.459877,\n \"stddev_ts\": 0.015462,\n \"samples_ns\": [ 353566749374, 352096741121, 346557875593 ],\n \"samples_ts\": [ 1.4481, 1.45415, 1.47739 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T13:15:46Z", "avg_ns": 31977283537, "stddev_ns": 3976410975, "avg_ts": 5.084745, "stddev_ts": 3.374493, "samples_ns": [ 14254436556, 39574470381, 42102943674 ], "samples_ts": [ 8.97966, 3.23441, 3.04017 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-10T13:17:34Z", "avg_ns": 350740455362, "stddev_ns": 3696040031, "avg_ts": 1.459877, "stddev_ts": 0.015462, "samples_ns": [ 353566749374, 352096741121, 346557875593 ], "samples_ts": [ 1.4481, 1.45415, 1.47739 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 565 }, { "timestamp_utc": "2025-12-10T13:45:46.979919+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T13:35:08Z\",\n \"avg_ns\": 105245270678,\n \"stddev_ns\": 4063170791,\n \"avg_ts\": 5.915384,\n \"stddev_ts\": 3.533786,\n \"samples_ns\": [ 137161290045, 127335883604, 51238638385 ],\n \"samples_ts\": [ 3.73283, 4.02086, 9.99246 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T13:41:13Z\",\n \"avg_ns\": 90911457629,\n \"stddev_ns\": 4282075788,\n \"avg_ts\": 1.990128,\n \"stddev_ts\": 1.581878,\n \"samples_ns\": [ 129914747232, 109255126637, 33564499019 ],\n \"samples_ts\": [ 0.985262, 1.17157, 3.81355 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T13:35:08Z", "avg_ns": 105245270678, "stddev_ns": 4063170791, "avg_ts": 5.915384, "stddev_ts": 3.533786, "samples_ns": [ 137161290045, 127335883604, 51238638385 ], "samples_ts": [ 3.73283, 4.02086, 9.99246 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-10T13:41:13Z", "avg_ns": 90911457629, "stddev_ns": 4282075788, "avg_ts": 1.990128, "stddev_ts": 1.581878, "samples_ns": [ 129914747232, 109255126637, 33564499019 ], "samples_ts": [ 0.985262, 1.17157, 3.81355 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 566 }, { "timestamp_utc": "2025-12-10T14:10:11.776053+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T13:45:47Z\",\n \"avg_ns\": 115481331628,\n \"stddev_ns\": 3384196718,\n \"avg_ts\": 5.796804,\n \"stddev_ts\": 4.081618,\n \"samples_ns\": [ 151580197706, 48718785717, 146145011461 ],\n \"samples_ts\": [ 3.37775, 10.5093, 3.50337 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T13:53:27Z\",\n \"avg_ns\": 334310186293,\n \"stddev_ns\": 2051411892,\n \"avg_ts\": 1.532495,\n \"stddev_ts\": 0.047968,\n \"samples_ns\": [ 340617688849, 339864301639, 322448568393 ],\n \"samples_ts\": [ 1.50315, 1.50648, 1.58785 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T13:45:47Z", "avg_ns": 115481331628, "stddev_ns": 3384196718, "avg_ts": 5.796804, "stddev_ts": 4.081618, "samples_ns": [ 151580197706, 48718785717, 146145011461 ], "samples_ts": [ 3.37775, 10.5093, 3.50337 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-10T13:53:27Z", "avg_ns": 334310186293, "stddev_ns": 2051411892, "avg_ts": 1.532495, "stddev_ts": 0.047968, "samples_ns": [ 340617688849, 339864301639, 322448568393 ], "samples_ts": [ 1.50315, 1.50648, 1.58785 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 567 }, { "timestamp_utc": "2025-12-10T14:15:39.719278+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T14:10:12Z\",\n \"avg_ns\": 12164330873,\n \"stddev_ns\": 25042231,\n \"avg_ts\": 10.522598,\n \"stddev_ts\": 0.021686,\n \"samples_ns\": [ 12181577680, 12175807518, 12135607422 ],\n \"samples_ts\": [ 10.5077, 10.5126, 10.5475 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T14:11:01Z\",\n \"avg_ns\": 92619322076,\n \"stddev_ns\": 928783954,\n \"avg_ts\": 1.917708,\n \"stddev_ts\": 1.476711,\n \"samples_ns\": [ 108472121242, 134006377726, 35379467261 ],\n \"samples_ts\": [ 1.18003, 0.955178, 3.61792 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T14:10:12Z", "avg_ns": 12164330873, "stddev_ns": 25042231, "avg_ts": 10.522598, "stddev_ts": 0.021686, "samples_ns": [ 12181577680, 12175807518, 12135607422 ], "samples_ts": [ 10.5077, 10.5126, 10.5475 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-10T14:11:01Z", "avg_ns": 92619322076, "stddev_ns": 928783954, "avg_ts": 1.917708, "stddev_ts": 1.476711, "samples_ns": [ 108472121242, 134006377726, 35379467261 ], "samples_ts": [ 1.18003, 0.955178, 3.61792 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 568 }, { "timestamp_utc": "2025-12-10T14:34:39.220683+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T14:15:40Z\",\n \"avg_ns\": 27304779469,\n \"stddev_ns\": 1780390185,\n \"avg_ts\": 6.070139,\n \"stddev_ts\": 3.955565,\n \"samples_ns\": [ 12138374841, 27675780590, 42100182977 ],\n \"samples_ts\": [ 10.5451, 4.62498, 3.04037 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T14:17:14Z\",\n \"avg_ns\": 347645535700,\n \"stddev_ns\": 351118265,\n \"avg_ts\": 1.472765,\n \"stddev_ts\": 0.001488,\n \"samples_ns\": [ 347245226005, 347901351555, 347790029540 ],\n \"samples_ts\": [ 1.47446, 1.47168, 1.47215 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T14:15:40Z", "avg_ns": 27304779469, "stddev_ns": 1780390185, "avg_ts": 6.070139, "stddev_ts": 3.955565, "samples_ns": [ 12138374841, 27675780590, 42100182977 ], "samples_ts": [ 10.5451, 4.62498, 3.04037 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-10T14:17:14Z", "avg_ns": 347645535700, "stddev_ns": 351118265, "avg_ts": 1.472765, "stddev_ts": 0.001488, "samples_ns": [ 347245226005, 347901351555, 347790029540 ], "samples_ts": [ 1.47446, 1.47168, 1.47215 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 569 }, { "timestamp_utc": "2025-12-10T14:45:37.587148+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T14:34:40Z\",\n \"avg_ns\": 104019300526,\n \"stddev_ns\": 4251608186,\n \"avg_ts\": 6.186438,\n \"stddev_ts\": 3.834540,\n \"samples_ns\": [ 108597442243, 154878216264, 48582243071 ],\n \"samples_ts\": [ 4.71466, 3.30582, 10.5388 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T14:40:59Z\",\n \"avg_ns\": 92692873657,\n \"stddev_ns\": 4176178178,\n \"avg_ts\": 1.785929,\n \"stddev_ts\": 1.190746,\n \"samples_ns\": [ 99986573451, 137406391449, 40685656073 ],\n \"samples_ts\": [ 1.28017, 0.931543, 3.14607 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T14:34:40Z", "avg_ns": 104019300526, "stddev_ns": 4251608186, "avg_ts": 6.186438, "stddev_ts": 3.83454, "samples_ns": [ 108597442243, 154878216264, 48582243071 ], "samples_ts": [ 4.71466, 3.30582, 10.5388 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-10T14:40:59Z", "avg_ns": 92692873657, "stddev_ns": 4176178178, "avg_ts": 1.785929, "stddev_ts": 1.190746, "samples_ns": [ 99986573451, 137406391449, 40685656073 ], "samples_ts": [ 1.28017, 0.931543, 3.14607 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 570 }, { "timestamp_utc": "2025-12-10T15:09:48.532362+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T14:45:38Z\",\n \"avg_ns\": 113956732426,\n \"stddev_ns\": 4023771968,\n \"avg_ts\": 5.450458,\n \"stddev_ts\": 3.085048,\n \"samples_ns\": [ 166714710193, 57295568635, 117859918451 ],\n \"samples_ts\": [ 3.07111, 8.93612, 4.34414 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T14:52:48Z\",\n \"avg_ns\": 339515242700,\n \"stddev_ns\": 1241267665,\n \"avg_ts\": 1.508046,\n \"stddev_ts\": 0.005516,\n \"samples_ns\": [ 340705789650, 339611123006, 338228815445 ],\n \"samples_ts\": [ 1.50276, 1.50761, 1.51377 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T14:45:38Z", "avg_ns": 113956732426, "stddev_ns": 4023771968, "avg_ts": 5.450458, "stddev_ts": 3.085048, "samples_ns": [ 166714710193, 57295568635, 117859918451 ], "samples_ts": [ 3.07111, 8.93612, 4.34414 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-10T14:52:48Z", "avg_ns": 339515242700, "stddev_ns": 1241267665, "avg_ts": 1.508046, "stddev_ts": 0.005516, "samples_ns": [ 340705789650, 339611123006, 338228815445 ], "samples_ts": [ 1.50276, 1.50761, 1.51377 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 571 }, { "timestamp_utc": "2025-12-10T15:15:27.907910+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T15:09:50Z\",\n \"avg_ns\": 12170000515,\n \"stddev_ns\": 36688025,\n \"avg_ts\": 10.517730,\n \"stddev_ts\": 0.031717,\n \"samples_ns\": [ 12132096632, 12172567753, 12205337162 ],\n \"samples_ts\": [ 10.5505, 10.5154, 10.4872 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T15:10:49Z\",\n \"avg_ns\": 92497267805,\n \"stddev_ns\": 4233275682,\n \"avg_ts\": 1.505948,\n \"stddev_ts\": 0.477660,\n \"samples_ns\": [ 74613244205, 133426732188, 69451827024 ],\n \"samples_ts\": [ 1.71551, 0.959328, 1.843 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T15:09:50Z", "avg_ns": 12170000515, "stddev_ns": 36688025, "avg_ts": 10.51773, "stddev_ts": 0.031717, "samples_ns": [ 12132096632, 12172567753, 12205337162 ], "samples_ts": [ 10.5505, 10.5154, 10.4872 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-10T15:10:49Z", "avg_ns": 92497267805, "stddev_ns": 4233275682, "avg_ts": 1.505948, "stddev_ts": 0.47766, "samples_ns": [ 74613244205, 133426732188, 69451827024 ], "samples_ts": [ 1.71551, 0.959328, 1.843 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 572 }, { "timestamp_utc": "2025-12-10T15:33:59.515358+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T15:15:28Z\",\n \"avg_ns\": 17672462454,\n \"stddev_ns\": 4015700185,\n \"avg_ts\": 8.486484,\n \"stddev_ts\": 3.476590,\n \"samples_ns\": [ 12224787299, 12171037071, 28621562994 ],\n \"samples_ts\": [ 10.4705, 10.5168, 4.47215 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T15:16:34Z\",\n \"avg_ns\": 347984130654,\n \"stddev_ns\": 586236466,\n \"avg_ts\": 1.471334,\n \"stddev_ts\": 0.002476,\n \"samples_ns\": [ 347636901135, 348660981879, 347654508948 ],\n \"samples_ts\": [ 1.4728, 1.46848, 1.47273 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T15:15:28Z", "avg_ns": 17672462454, "stddev_ns": 4015700185, "avg_ts": 8.486484, "stddev_ts": 3.47659, "samples_ns": [ 12224787299, 12171037071, 28621562994 ], "samples_ts": [ 10.4705, 10.5168, 4.47215 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-10T15:16:34Z", "avg_ns": 347984130654, "stddev_ns": 586236466, "avg_ts": 1.471334, "stddev_ts": 0.002476, "samples_ns": [ 347636901135, 348660981879, 347654508948 ], "samples_ts": [ 1.4728, 1.46848, 1.47273 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 573 }, { "timestamp_utc": "2025-12-10T15:45:23.988159+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T15:34:01Z\",\n \"avg_ns\": 104196150271,\n \"stddev_ns\": 1639481184,\n \"avg_ts\": 5.770751,\n \"stddev_ts\": 2.484954,\n \"samples_ns\": [ 81516989510, 166758273862, 64313187441 ],\n \"samples_ts\": [ 6.2809, 3.07031, 7.96104 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T15:40:48Z\",\n \"avg_ns\": 91742002367,\n \"stddev_ns\": 3550029161,\n \"avg_ts\": 1.513045,\n \"stddev_ts\": 0.480521,\n \"samples_ns\": [ 66826922249, 130481283740, 77917801114 ],\n \"samples_ts\": [ 1.9154, 0.980984, 1.64276 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T15:34:01Z", "avg_ns": 104196150271, "stddev_ns": 1639481184, "avg_ts": 5.770751, "stddev_ts": 2.484954, "samples_ns": [ 81516989510, 166758273862, 64313187441 ], "samples_ts": [ 6.2809, 3.07031, 7.96104 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-10T15:40:48Z", "avg_ns": 91742002367, "stddev_ns": 3550029161, "avg_ts": 1.513045, "stddev_ts": 0.480521, "samples_ns": [ 66826922249, 130481283740, 77917801114 ], "samples_ts": [ 1.9154, 0.980984, 1.64276 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 574 }, { "timestamp_utc": "2025-12-10T16:09:02.329621+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T15:45:24Z\",\n \"avg_ns\": 114687631451,\n \"stddev_ns\": 1990455047,\n \"avg_ts\": 4.885034,\n \"stddev_ts\": 1.584212,\n \"samples_ns\": [ 167440287332, 89456713809, 87165893212 ],\n \"samples_ts\": [ 3.05781, 5.72344, 5.87386 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T15:52:05Z\",\n \"avg_ns\": 338567880434,\n \"stddev_ns\": 620057230,\n \"avg_ts\": 1.512256,\n \"stddev_ts\": 0.002768,\n \"samples_ns\": [ 339251093080, 338411707801, 338040840423 ],\n \"samples_ts\": [ 1.50921, 1.51295, 1.51461 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T15:45:24Z", "avg_ns": 114687631451, "stddev_ns": 1990455047, "avg_ts": 4.885034, "stddev_ts": 1.584212, "samples_ns": [ 167440287332, 89456713809, 87165893212 ], "samples_ts": [ 3.05781, 5.72344, 5.87386 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_type": "gemma3 4B Q8_0", "model_size": 4123860992, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-10T15:52:05Z", "avg_ns": 338567880434, "stddev_ns": 620057230, "avg_ts": 1.512256, "stddev_ts": 0.002768, "samples_ns": [ 339251093080, 338411707801, 338040840423 ], "samples_ts": [ 1.50921, 1.51295, 1.51461 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q8_0", "threads": 2, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 575 }, { "timestamp_utc": "2025-12-10T16:15:02.475707+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T16:09:16Z\",\n \"avg_ns\": 29148844290,\n \"stddev_ns\": 5531485,\n \"avg_ts\": 4.391255,\n \"stddev_ts\": 0.000833,\n \"samples_ns\": [ 29152693084, 29142512046, 29151327742 ],\n \"samples_ts\": [ 4.39067, 4.39221, 4.39088 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T16:11:34Z\",\n \"avg_ns\": 69137713024,\n \"stddev_ns\": 1980839746,\n \"avg_ts\": 1.956380,\n \"stddev_ts\": 0.602425,\n \"samples_ns\": [ 48265483342, 79508494977, 79639160754 ],\n \"samples_ts\": [ 2.652, 1.60989, 1.60725 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T16:09:16Z", "avg_ns": 29148844290, "stddev_ns": 5531485, "avg_ts": 4.391255, "stddev_ts": 0.000833, "samples_ns": [ 29152693084, 29142512046, 29151327742 ], "samples_ts": [ 4.39067, 4.39221, 4.39088 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-10T16:11:34Z", "avg_ns": 69137713024, "stddev_ns": 1980839746, "avg_ts": 1.95638, "stddev_ts": 0.602425, "samples_ns": [ 48265483342, 79508494977, 79639160754 ], "samples_ts": [ 2.652, 1.60989, 1.60725 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 512, "ubatch_size": 128, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 576 }, { "timestamp_utc": "2025-12-10T16:28:51.009347+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T16:15:03Z\",\n \"avg_ns\": 29231107599,\n \"stddev_ns\": 59142795,\n \"avg_ts\": 4.378909,\n \"stddev_ts\": 0.008868,\n \"samples_ns\": [ 29164341878, 29276925257, 29252055662 ],\n \"samples_ts\": [ 4.38892, 4.37204, 4.37576 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T16:17:01Z\",\n \"avg_ns\": 236197011048,\n \"stddev_ns\": 3397857119,\n \"avg_ts\": 2.177303,\n \"stddev_ts\": 0.181479,\n \"samples_ns\": [ 246964907730, 247117954585, 214508170829 ],\n \"samples_ts\": [ 2.07317, 2.07189, 2.38686 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T16:15:03Z", "avg_ns": 29231107599, "stddev_ns": 59142795, "avg_ts": 4.378909, "stddev_ts": 0.008868, "samples_ns": [ 29164341878, 29276925257, 29252055662 ], "samples_ts": [ 4.38892, 4.37204, 4.37576 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-10T16:17:01Z", "avg_ns": 236197011048, "stddev_ns": 3397857119, "avg_ts": 2.177303, "stddev_ts": 0.181479, "samples_ns": [ 246964907730, 247117954585, 214508170829 ], "samples_ts": [ 2.07317, 2.07189, 2.38686 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 512, "ubatch_size": 128, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 577 }, { "timestamp_utc": "2025-12-10T16:41:58.677050+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T16:28:52Z\",\n \"avg_ns\": 160583762245,\n \"stddev_ns\": 3366089379,\n \"avg_ts\": 3.211049,\n \"stddev_ts\": 0.338798,\n \"samples_ns\": [ 166166670312, 142397992518, 173186623906 ],\n \"samples_ts\": [ 3.08124, 3.59556, 2.95635 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T16:39:23Z\",\n \"avg_ns\": 51451459191,\n \"stddev_ns\": 1443440715,\n \"avg_ts\": 2.552551,\n \"stddev_ts\": 0.470732,\n \"samples_ns\": [ 63713198037, 45284453824, 45356725714 ],\n \"samples_ts\": [ 2.009, 2.82658, 2.82207 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T16:28:52Z", "avg_ns": 160583762245, "stddev_ns": 3366089379, "avg_ts": 3.211049, "stddev_ts": 0.338798, "samples_ns": [ 166166670312, 142397992518, 173186623906 ], "samples_ts": [ 3.08124, 3.59556, 2.95635 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-10T16:39:23Z", "avg_ns": 51451459191, "stddev_ns": 1443440715, "avg_ts": 2.552551, "stddev_ts": 0.470732, "samples_ns": [ 63713198037, 45284453824, 45356725714 ], "samples_ts": [ 2.009, 2.82658, 2.82207 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 512, "ubatch_size": 128, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 578 }, { "timestamp_utc": "2025-12-10T17:05:12.189955+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T16:41:59Z\",\n \"avg_ns\": 142078510495,\n \"stddev_ns\": 3847687929,\n \"avg_ts\": 3.798493,\n \"stddev_ts\": 0.972715,\n \"samples_ns\": [ 117072353376, 191372956262, 117790221849 ],\n \"samples_ts\": [ 4.37336, 2.6754, 4.34671 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T16:52:17Z\",\n \"avg_ns\": 257965023070,\n \"stddev_ns\": 773771921,\n \"avg_ts\": 1.984777,\n \"stddev_ts\": 0.005955,\n \"samples_ns\": [ 257164470669, 258021700847, 258708897694 ],\n \"samples_ts\": [ 1.99094, 1.98433, 1.97906 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T16:41:59Z", "avg_ns": 142078510495, "stddev_ns": 3847687929, "avg_ts": 3.798493, "stddev_ts": 0.972715, "samples_ns": [ 117072353376, 191372956262, 117790221849 ], "samples_ts": [ 4.37336, 2.6754, 4.34671 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-10T16:52:17Z", "avg_ns": 257965023070, "stddev_ns": 773771921, "avg_ts": 1.984777, "stddev_ts": 0.005955, "samples_ns": [ 257164470669, 258021700847, 258708897694 ], "samples_ts": [ 1.99094, 1.98433, 1.97906 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 512, "ubatch_size": 128, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 579 }, { "timestamp_utc": "2025-12-10T17:10:40.794136+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T17:05:12Z\",\n \"avg_ns\": 29263489202,\n \"stddev_ns\": 72454359,\n \"avg_ts\": 4.374069,\n \"stddev_ts\": 0.010818,\n \"samples_ns\": [ 29206357700, 29344985358, 29239124548 ],\n \"samples_ts\": [ 4.38261, 4.3619, 4.3777 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T17:07:10Z\",\n \"avg_ns\": 70071223786,\n \"stddev_ns\": 1849050261,\n \"avg_ts\": 1.889014,\n \"stddev_ts\": 0.446457,\n \"samples_ns\": [ 76407382423, 80521013756, 53285275180 ],\n \"samples_ts\": [ 1.67523, 1.58965, 2.40216 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T17:05:12Z", "avg_ns": 29263489202, "stddev_ns": 72454359, "avg_ts": 4.374069, "stddev_ts": 0.010818, "samples_ns": [ 29206357700, 29344985358, 29239124548 ], "samples_ts": [ 4.38261, 4.3619, 4.3777 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-10T17:07:10Z", "avg_ns": 70071223786, "stddev_ns": 1849050261, "avg_ts": 1.889014, "stddev_ts": 0.446457, "samples_ns": [ 76407382423, 80521013756, 53285275180 ], "samples_ts": [ 1.67523, 1.58965, 2.40216 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 512, "ubatch_size": 256, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 580 }, { "timestamp_utc": "2025-12-10T17:25:09.021897+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T17:10:41Z\",\n \"avg_ns\": 35100270962,\n \"stddev_ns\": 1598366816,\n \"avg_ts\": 3.831382,\n \"stddev_ts\": 0.953473,\n \"samples_ns\": [ 29085943913, 29339152360, 46875716614 ],\n \"samples_ts\": [ 4.40075, 4.36277, 2.73062 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T17:12:56Z\",\n \"avg_ns\": 244001589812,\n \"stddev_ns\": 4264898738,\n \"avg_ts\": 2.101011,\n \"stddev_ts\": 0.092784,\n \"samples_ns\": [ 231871309673, 249811173266, 250322286498 ],\n \"samples_ts\": [ 2.20812, 2.04955, 2.04536 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T17:10:41Z", "avg_ns": 35100270962, "stddev_ns": 1598366816, "avg_ts": 3.831382, "stddev_ts": 0.953473, "samples_ns": [ 29085943913, 29339152360, 46875716614 ], "samples_ts": [ 4.40075, 4.36277, 2.73062 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-10T17:12:56Z", "avg_ns": 244001589812, "stddev_ns": 4264898738, "avg_ts": 2.101011, "stddev_ts": 0.092784, "samples_ns": [ 231871309673, 249811173266, 250322286498 ], "samples_ts": [ 2.20812, 2.04955, 2.04536 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 512, "ubatch_size": 256, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 581 }, { "timestamp_utc": "2025-12-10T17:38:05.570322+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T17:25:09Z\",\n \"avg_ns\": 167061706463,\n \"stddev_ns\": 3436018349,\n \"avg_ts\": 3.223639,\n \"stddev_ts\": 0.948038,\n \"samples_ns\": [ 191864088608, 118565110020, 190755920762 ],\n \"samples_ts\": [ 2.66856, 4.3183, 2.68406 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T17:35:28Z\",\n \"avg_ns\": 52129954475,\n \"stddev_ns\": 3473918482,\n \"avg_ts\": 2.542762,\n \"stddev_ts\": 0.540655,\n \"samples_ns\": [ 44775433076, 44895051161, 66719379188 ],\n \"samples_ts\": [ 2.85871, 2.85109, 1.91848 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T17:25:09Z", "avg_ns": 167061706463, "stddev_ns": 3436018349, "avg_ts": 3.223639, "stddev_ts": 0.948038, "samples_ns": [ 191864088608, 118565110020, 190755920762 ], "samples_ts": [ 2.66856, 4.3183, 2.68406 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-10T17:35:28Z", "avg_ns": 52129954475, "stddev_ns": 3473918482, "avg_ts": 2.542762, "stddev_ts": 0.540655, "samples_ns": [ 44775433076, 44895051161, 66719379188 ], "samples_ts": [ 2.85871, 2.85109, 1.91848 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 512, "ubatch_size": 256, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 582 }, { "timestamp_utc": "2025-12-10T18:00:21.892874+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T17:38:07Z\",\n \"avg_ns\": 153993016899,\n \"stddev_ns\": 4075979347,\n \"avg_ts\": 3.330654,\n \"stddev_ts\": 0.170026,\n \"samples_ns\": [ 146566696624, 162328708059, 153083646015 ],\n \"samples_ts\": [ 3.49329, 3.15409, 3.34458 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T17:48:38Z\",\n \"avg_ns\": 234273877733,\n \"stddev_ns\": 4254926311,\n \"avg_ts\": 2.197664,\n \"stddev_ts\": 0.205791,\n \"samples_ns\": [ 210241869219, 246345236902, 246234527080 ],\n \"samples_ts\": [ 2.43529, 2.07838, 2.07932 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T17:38:07Z", "avg_ns": 153993016899, "stddev_ns": 4075979347, "avg_ts": 3.330654, "stddev_ts": 0.170026, "samples_ns": [ 146566696624, 162328708059, 153083646015 ], "samples_ts": [ 3.49329, 3.15409, 3.34458 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-10T17:48:38Z", "avg_ns": 234273877733, "stddev_ns": 4254926311, "avg_ts": 2.197664, "stddev_ts": 0.205791, "samples_ns": [ 210241869219, 246345236902, 246234527080 ], "samples_ts": [ 2.43529, 2.07838, 2.07932 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 512, "ubatch_size": 256, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 583 }, { "timestamp_utc": "2025-12-10T18:05:51.125178+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T18:00:22Z\",\n \"avg_ns\": 31070754584,\n \"stddev_ns\": 2556392572,\n \"avg_ts\": 4.137456,\n \"stddev_ts\": 0.324997,\n \"samples_ns\": [ 29635184193, 29554821753, 34022257806 ],\n \"samples_ts\": [ 4.31919, 4.33093, 3.76224 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T18:02:25Z\",\n \"avg_ns\": 68259534222,\n \"stddev_ns\": 4092100122,\n \"avg_ts\": 2.015621,\n \"stddev_ts\": 0.715223,\n \"samples_ns\": [ 79803253964, 79928540435, 45046808269 ],\n \"samples_ts\": [ 1.60394, 1.60143, 2.84149 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T18:00:22Z", "avg_ns": 31070754584, "stddev_ns": 2556392572, "avg_ts": 4.137456, "stddev_ts": 0.324997, "samples_ns": [ 29635184193, 29554821753, 34022257806 ], "samples_ts": [ 4.31919, 4.33093, 3.76224 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-10T18:02:25Z", "avg_ns": 68259534222, "stddev_ns": 4092100122, "avg_ts": 2.015621, "stddev_ts": 0.715223, "samples_ns": [ 79803253964, 79928540435, 45046808269 ], "samples_ts": [ 1.60394, 1.60143, 2.84149 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 512, "ubatch_size": 512, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 584 }, { "timestamp_utc": "2025-12-10T18:20:14.260910+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T18:05:51Z\",\n \"avg_ns\": 37876261177,\n \"stddev_ns\": 4013651336,\n \"avg_ts\": 3.587677,\n \"stddev_ts\": 0.985454,\n \"samples_ns\": [ 29366359272, 32595868261, 51666555999 ],\n \"samples_ts\": [ 4.35873, 3.92688, 2.47742 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T18:08:14Z\",\n \"avg_ns\": 239547716073,\n \"stddev_ns\": 1101134169,\n \"avg_ts\": 2.143352,\n \"stddev_ts\": 0.141400,\n \"samples_ns\": [ 221973109854, 248718046030, 247951992335 ],\n \"samples_ts\": [ 2.30659, 2.05856, 2.06492 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T18:05:51Z", "avg_ns": 37876261177, "stddev_ns": 4013651336, "avg_ts": 3.587677, "stddev_ts": 0.985454, "samples_ns": [ 29366359272, 32595868261, 51666555999 ], "samples_ts": [ 4.35873, 3.92688, 2.47742 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-10T18:08:14Z", "avg_ns": 239547716073, "stddev_ns": 1101134169, "avg_ts": 2.143352, "stddev_ts": 0.1414, "samples_ns": [ 221973109854, 248718046030, 247951992335 ], "samples_ts": [ 2.30659, 2.05856, 2.06492 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 512, "ubatch_size": 512, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 585 }, { "timestamp_utc": "2025-12-10T18:33:07.033883+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T18:20:15Z\",\n \"avg_ns\": 167365779701,\n \"stddev_ns\": 1513785278,\n \"avg_ts\": 3.184489,\n \"stddev_ts\": 0.829035,\n \"samples_ns\": [ 192050827512, 123652506768, 186394004823 ],\n \"samples_ts\": [ 2.66596, 4.14064, 2.74687 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T18:30:35Z\",\n \"avg_ns\": 50376340816,\n \"stddev_ns\": 4091904551,\n \"avg_ts\": 2.645667,\n \"stddev_ts\": 0.601094,\n \"samples_ns\": [ 42767315692, 42773923035, 65587783723 ],\n \"samples_ts\": [ 2.99294, 2.99248, 1.95158 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T18:20:15Z", "avg_ns": 167365779701, "stddev_ns": 1513785278, "avg_ts": 3.184489, "stddev_ts": 0.829035, "samples_ns": [ 192050827512, 123652506768, 186394004823 ], "samples_ts": [ 2.66596, 4.14064, 2.74687 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-10T18:30:35Z", "avg_ns": 50376340816, "stddev_ns": 4091904551, "avg_ts": 2.645667, "stddev_ts": 0.601094, "samples_ns": [ 42767315692, 42773923035, 65587783723 ], "samples_ts": [ 2.99294, 2.99248, 1.95158 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 512, "ubatch_size": 512, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 586 }, { "timestamp_utc": "2025-12-10T18:56:06.236679+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T18:33:08Z\",\n \"avg_ns\": 155055719171,\n \"stddev_ns\": 2863146145,\n \"avg_ts\": 3.306190,\n \"stddev_ts\": 0.143822,\n \"samples_ns\": [ 148164549366, 161579441178, 155423166969 ],\n \"samples_ts\": [ 3.45562, 3.16872, 3.29423 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T18:43:42Z\",\n \"avg_ns\": 247631291769,\n \"stddev_ns\": 3496395705,\n \"avg_ts\": 2.073882,\n \"stddev_ts\": 0.142136,\n \"samples_ns\": [ 229106247484, 253274939689, 260512688134 ],\n \"samples_ts\": [ 2.23477, 2.02152, 1.96536 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T18:33:08Z", "avg_ns": 155055719171, "stddev_ns": 2863146145, "avg_ts": 3.30619, "stddev_ts": 0.143822, "samples_ns": [ 148164549366, 161579441178, 155423166969 ], "samples_ts": [ 3.45562, 3.16872, 3.29423 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-10T18:43:42Z", "avg_ns": 247631291769, "stddev_ns": 3496395705, "avg_ts": 2.073882, "stddev_ts": 0.142136, "samples_ns": [ 229106247484, 253274939689, 260512688134 ], "samples_ts": [ 2.23477, 2.02152, 1.96536 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 512, "ubatch_size": 512, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 587 }, { "timestamp_utc": "2025-12-10T19:01:35.280149+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T18:56:07Z\",\n \"avg_ns\": 41713328628,\n \"stddev_ns\": 3625890262,\n \"avg_ts\": 3.251993,\n \"stddev_ts\": 1.003260,\n \"samples_ns\": [ 29214415770, 43977336569, 51948233546 ],\n \"samples_ts\": [ 4.3814, 2.91059, 2.46399 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T18:58:41Z\",\n \"avg_ns\": 57723258993,\n \"stddev_ns\": 3811866383,\n \"avg_ts\": 2.391182,\n \"stddev_ts\": 0.718134,\n \"samples_ns\": [ 81903781436, 46164213097, 45101782448 ],\n \"samples_ts\": [ 1.56281, 2.77271, 2.83803 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T18:56:07Z", "avg_ns": 41713328628, "stddev_ns": 3625890262, "avg_ts": 3.251993, "stddev_ts": 1.00326, "samples_ns": [ 29214415770, 43977336569, 51948233546 ], "samples_ts": [ 4.3814, 2.91059, 2.46399 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-10T18:58:41Z", "avg_ns": 57723258993, "stddev_ns": 3811866383, "avg_ts": 2.391182, "stddev_ts": 0.718134, "samples_ns": [ 81903781436, 46164213097, 45101782448 ], "samples_ts": [ 1.56281, 2.77271, 2.83803 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 588 }, { "timestamp_utc": "2025-12-10T19:16:29.590070+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T19:01:36Z\",\n \"avg_ns\": 49212678274,\n \"stddev_ns\": 3267083054,\n \"avg_ts\": 2.616072,\n \"stddev_ts\": 0.250167,\n \"samples_ns\": [ 44064253589, 51654617734, 51919163500 ],\n \"samples_ts\": [ 2.90485, 2.478, 2.46537 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T19:04:33Z\",\n \"avg_ns\": 238579252373,\n \"stddev_ns\": 3212791807,\n \"avg_ts\": 2.153672,\n \"stddev_ts\": 0.154041,\n \"samples_ns\": [ 226211693800, 230621747087, 258904316233 ],\n \"samples_ts\": [ 2.26337, 2.22009, 1.97756 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T19:01:36Z", "avg_ns": 49212678274, "stddev_ns": 3267083054, "avg_ts": 2.616072, "stddev_ts": 0.250167, "samples_ns": [ 44064253589, 51654617734, 51919163500 ], "samples_ts": [ 2.90485, 2.478, 2.46537 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-10T19:04:33Z", "avg_ns": 238579252373, "stddev_ns": 3212791807, "avg_ts": 2.153672, "stddev_ts": 0.154041, "samples_ns": [ 226211693800, 230621747087, 258904316233 ], "samples_ts": [ 2.26337, 2.22009, 1.97756 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 589 }, { "timestamp_utc": "2025-12-10T19:30:29.528816+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T19:16:30Z\",\n \"avg_ns\": 149789005317,\n \"stddev_ns\": 603031066,\n \"avg_ts\": 3.520857,\n \"stddev_ts\": 0.700974,\n \"samples_ns\": [ 136248598622, 187400975584, 125717441745 ],\n \"samples_ts\": [ 3.75784, 2.73211, 4.07263 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T19:26:56Z\",\n \"avg_ns\": 70769043092,\n \"stddev_ns\": 1389338108,\n \"avg_ts\": 1.830306,\n \"stddev_ts\": 0.236642,\n \"samples_ns\": [ 67298600436, 81730910700, 63277618140 ],\n \"samples_ts\": [ 1.90197, 1.56611, 2.02283 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T19:16:30Z", "avg_ns": 149789005317, "stddev_ns": 603031066, "avg_ts": 3.520857, "stddev_ts": 0.700974, "samples_ns": [ 136248598622, 187400975584, 125717441745 ], "samples_ts": [ 3.75784, 2.73211, 4.07263 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-10T19:26:56Z", "avg_ns": 70769043092, "stddev_ns": 1389338108, "avg_ts": 1.830306, "stddev_ts": 0.236642, "samples_ns": [ 67298600436, 81730910700, 63277618140 ], "samples_ts": [ 1.90197, 1.56611, 2.02283 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 590 }, { "timestamp_utc": "2025-12-10T19:51:55.389573+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T19:30:30Z\",\n \"avg_ns\": 163813370318,\n \"stddev_ns\": 2059099829,\n \"avg_ts\": 3.189356,\n \"stddev_ts\": 0.579789,\n \"samples_ns\": [ 182521519227, 132770816061, 176147775667 ],\n \"samples_ts\": [ 2.80515, 3.85627, 2.90665 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T19:40:47Z\",\n \"avg_ns\": 222321303646,\n \"stddev_ns\": 2538118437,\n \"avg_ts\": 2.313729,\n \"stddev_ts\": 0.192571,\n \"samples_ns\": [ 241743732413, 204561253574, 220658924952 ],\n \"samples_ts\": [ 2.11795, 2.50292, 2.32032 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T19:30:30Z", "avg_ns": 163813370318, "stddev_ns": 2059099829, "avg_ts": 3.189356, "stddev_ts": 0.579789, "samples_ns": [ 182521519227, 132770816061, 176147775667 ], "samples_ts": [ 2.80515, 3.85627, 2.90665 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-10T19:40:47Z", "avg_ns": 222321303646, "stddev_ns": 2538118437, "avg_ts": 2.313729, "stddev_ts": 0.192571, "samples_ns": [ 241743732413, 204561253574, 220658924952 ], "samples_ts": [ 2.11795, 2.50292, 2.32032 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 591 }, { "timestamp_utc": "2025-12-10T19:57:28.834171+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T19:51:56Z\",\n \"avg_ns\": 51576809610,\n \"stddev_ns\": 571299173,\n \"avg_ts\": 2.481940,\n \"stddev_ts\": 0.027627,\n \"samples_ns\": [ 52029800607, 51765623710, 50935004514 ],\n \"samples_ts\": [ 2.46013, 2.47268, 2.51301 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T19:55:07Z\",\n \"avg_ns\": 46865923492,\n \"stddev_ns\": 1224684492,\n \"avg_ts\": 2.739767,\n \"stddev_ts\": 0.184018,\n \"samples_ns\": [ 45022675682, 44928342564, 50646752230 ],\n \"samples_ts\": [ 2.84301, 2.84898, 2.52731 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T19:51:56Z", "avg_ns": 51576809610, "stddev_ns": 571299173, "avg_ts": 2.48194, "stddev_ts": 0.027627, "samples_ns": [ 52029800607, 51765623710, 50935004514 ], "samples_ts": [ 2.46013, 2.47268, 2.51301 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-10T19:55:07Z", "avg_ns": 46865923492, "stddev_ns": 1224684492, "avg_ts": 2.739767, "stddev_ts": 0.184018, "samples_ns": [ 45022675682, 44928342564, 50646752230 ], "samples_ts": [ 2.84301, 2.84898, 2.52731 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 592 }, { "timestamp_utc": "2025-12-10T20:12:16.122233+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T19:57:30Z\",\n \"avg_ns\": 44382629090,\n \"stddev_ns\": 4087784597,\n \"avg_ts\": 3.103966,\n \"stddev_ts\": 1.111681,\n \"samples_ns\": [ 52078747787, 51896111243, 29173028240 ],\n \"samples_ts\": [ 2.45782, 2.46647, 4.38761 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T20:00:35Z\",\n \"avg_ns\": 233362952181,\n \"stddev_ns\": 3655515556,\n \"avg_ts\": 2.198681,\n \"stddev_ts\": 0.122151,\n \"samples_ns\": [ 248824734463, 225343171539, 225920950541 ],\n \"samples_ts\": [ 2.05767, 2.27209, 2.26628 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T19:57:30Z", "avg_ns": 44382629090, "stddev_ns": 4087784597, "avg_ts": 3.103966, "stddev_ts": 1.111681, "samples_ns": [ 52078747787, 51896111243, 29173028240 ], "samples_ts": [ 2.45782, 2.46647, 4.38761 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-10T20:00:35Z", "avg_ns": 233362952181, "stddev_ns": 3655515556, "avg_ts": 2.198681, "stddev_ts": 0.122151, "samples_ns": [ 248824734463, 225343171539, 225920950541 ], "samples_ts": [ 2.05767, 2.27209, 2.26628 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 593 }, { "timestamp_utc": "2025-12-10T20:25:56.336223+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T20:12:17Z\",\n \"avg_ns\": 146193151665,\n \"stddev_ns\": 3589031991,\n \"avg_ts\": 3.610748,\n \"stddev_ts\": 0.723859,\n \"samples_ns\": [ 124268742022, 184030670494, 130280042481 ],\n \"samples_ts\": [ 4.1201, 2.78214, 3.93 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T20:22:46Z\",\n \"avg_ns\": 62966840090,\n \"stddev_ns\": 3802778440,\n \"avg_ts\": 2.169725,\n \"stddev_ts\": 0.725748,\n \"samples_ns\": [ 75972322989, 70319619631, 42608577652 ],\n \"samples_ts\": [ 1.68482, 1.82026, 3.00409 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T20:12:17Z", "avg_ns": 146193151665, "stddev_ns": 3589031991, "avg_ts": 3.610748, "stddev_ts": 0.723859, "samples_ns": [ 124268742022, 184030670494, 130280042481 ], "samples_ts": [ 4.1201, 2.78214, 3.93 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-10T20:22:46Z", "avg_ns": 62966840090, "stddev_ns": 3802778440, "avg_ts": 2.169725, "stddev_ts": 0.725748, "samples_ns": [ 75972322989, 70319619631, 42608577652 ], "samples_ts": [ 1.68482, 1.82026, 3.00409 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 594 }, { "timestamp_utc": "2025-12-10T20:47:30.923060+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T20:25:57Z\",\n \"avg_ns\": 156905142861,\n \"stddev_ns\": 456247848,\n \"avg_ts\": 3.264743,\n \"stddev_ts\": 0.088576,\n \"samples_ns\": [ 161848838287, 153863371623, 155003218673 ],\n \"samples_ts\": [ 3.16345, 3.32763, 3.30316 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T20:36:15Z\",\n \"avg_ns\": 225082723009,\n \"stddev_ns\": 4287975390,\n \"avg_ts\": 2.287652,\n \"stddev_ts\": 0.207735,\n \"samples_ns\": [ 248222872258, 219920609526, 207104687244 ],\n \"samples_ts\": [ 2.06266, 2.32811, 2.47218 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T20:25:57Z", "avg_ns": 156905142861, "stddev_ns": 456247848, "avg_ts": 3.264743, "stddev_ts": 0.088576, "samples_ns": [ 161848838287, 153863371623, 155003218673 ], "samples_ts": [ 3.16345, 3.32763, 3.30316 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-10T20:36:15Z", "avg_ns": 225082723009, "stddev_ns": 4287975390, "avg_ts": 2.287652, "stddev_ts": 0.207735, "samples_ns": [ 248222872258, 219920609526, 207104687244 ], "samples_ts": [ 2.06266, 2.32811, 2.47218 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 595 }, { "timestamp_utc": "2025-12-10T20:53:20.631938+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T20:47:32Z\",\n \"avg_ns\": 44015831319,\n \"stddev_ns\": 2594725765,\n \"avg_ts\": 3.118851,\n \"stddev_ts\": 1.088440,\n \"samples_ns\": [ 51682043678, 51112126653, 29253323627 ],\n \"samples_ts\": [ 2.47668, 2.5043, 4.37557 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T20:50:36Z\",\n \"avg_ns\": 54585728643,\n \"stddev_ns\": 1297190884,\n \"avg_ts\": 2.471782,\n \"stddev_ts\": 0.633060,\n \"samples_ns\": [ 45092966712, 45134359917, 73529859302 ],\n \"samples_ts\": [ 2.83858, 2.83598, 1.74079 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T20:47:32Z", "avg_ns": 44015831319, "stddev_ns": 2594725765, "avg_ts": 3.118851, "stddev_ts": 1.08844, "samples_ns": [ 51682043678, 51112126653, 29253323627 ], "samples_ts": [ 2.47668, 2.5043, 4.37557 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-10T20:50:36Z", "avg_ns": 54585728643, "stddev_ns": 1297190884, "avg_ts": 2.471782, "stddev_ts": 0.63306, "samples_ns": [ 45092966712, 45134359917, 73529859302 ], "samples_ts": [ 2.83858, 2.83598, 1.74079 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 596 }, { "timestamp_utc": "2025-12-10T21:08:11.444036+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T20:53:22Z\",\n \"avg_ns\": 36878159235,\n \"stddev_ns\": 2276181995,\n \"avg_ts\": 3.730752,\n \"stddev_ts\": 1.098630,\n \"samples_ns\": [ 51986716276, 29305737263, 29342024168 ],\n \"samples_ts\": [ 2.46217, 4.36775, 4.36234 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T20:56:04Z\",\n \"avg_ns\": 242073142030,\n \"stddev_ns\": 1277533721,\n \"avg_ts\": 2.121276,\n \"stddev_ts\": 0.140028,\n \"samples_ns\": [ 259012316803, 240288222021, 226918887266 ],\n \"samples_ts\": [ 1.97674, 2.13077, 2.25631 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T20:53:22Z", "avg_ns": 36878159235, "stddev_ns": 2276181995, "avg_ts": 3.730752, "stddev_ts": 1.09863, "samples_ns": [ 51986716276, 29305737263, 29342024168 ], "samples_ts": [ 2.46217, 4.36775, 4.36234 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-10T20:56:04Z", "avg_ns": 242073142030, "stddev_ns": 1277533721, "avg_ts": 2.121276, "stddev_ts": 0.140028, "samples_ns": [ 259012316803, 240288222021, 226918887266 ], "samples_ts": [ 1.97674, 2.13077, 2.25631 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 597 }, { "timestamp_utc": "2025-12-10T21:21:39.457648+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T21:08:12Z\",\n \"avg_ns\": 156394030835,\n \"stddev_ns\": 3845706789,\n \"avg_ts\": 3.275966,\n \"stddev_ts\": 0.104528,\n \"samples_ns\": [ 150738812853, 159385086363, 159058193290 ],\n \"samples_ts\": [ 3.3966, 3.21235, 3.21895 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T21:18:49Z\",\n \"avg_ns\": 56256039890,\n \"stddev_ns\": 1438092158,\n \"avg_ts\": 2.434151,\n \"stddev_ts\": 0.695875,\n \"samples_ns\": [ 78497150178, 45083040941, 45187928552 ],\n \"samples_ts\": [ 1.63063, 2.83921, 2.83261 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T21:08:12Z", "avg_ns": 156394030835, "stddev_ns": 3845706789, "avg_ts": 3.275966, "stddev_ts": 0.104528, "samples_ns": [ 150738812853, 159385086363, 159058193290 ], "samples_ts": [ 3.3966, 3.21235, 3.21895 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-10T21:18:49Z", "avg_ns": 56256039890, "stddev_ns": 1438092158, "avg_ts": 2.434151, "stddev_ts": 0.695875, "samples_ns": [ 78497150178, 45083040941, 45187928552 ], "samples_ts": [ 1.63063, 2.83921, 2.83261 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 598 }, { "timestamp_utc": "2025-12-10T21:44:51.855137+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T21:21:40Z\",\n \"avg_ns\": 146292042188,\n \"stddev_ns\": 4200805683,\n \"avg_ts\": 3.644029,\n \"stddev_ts\": 0.832975,\n \"samples_ns\": [ 128603602431, 189958794403, 120313729730 ],\n \"samples_ts\": [ 3.98123, 2.69532, 4.25554 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T21:32:01Z\",\n \"avg_ns\": 256703265651,\n \"stddev_ns\": 4072998301,\n \"avg_ts\": 1.994859,\n \"stddev_ts\": 0.031929,\n \"samples_ns\": [ 259481899536, 258600087851, 252027809566 ],\n \"samples_ts\": [ 1.97316, 1.97989, 2.03152 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T21:21:40Z", "avg_ns": 146292042188, "stddev_ns": 4200805683, "avg_ts": 3.644029, "stddev_ts": 0.832975, "samples_ns": [ 128603602431, 189958794403, 120313729730 ], "samples_ts": [ 3.98123, 2.69532, 4.25554 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-10T21:32:01Z", "avg_ns": 256703265651, "stddev_ns": 4072998301, "avg_ts": 1.994859, "stddev_ts": 0.031929, "samples_ns": [ 259481899536, 258600087851, 252027809566 ], "samples_ts": [ 1.97316, 1.97989, 2.03152 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 599 }, { "timestamp_utc": "2025-12-10T21:50:19.618254+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T21:44:53Z\",\n \"avg_ns\": 29268764578,\n \"stddev_ns\": 8699711,\n \"avg_ts\": 4.373263,\n \"stddev_ts\": 0.001300,\n \"samples_ns\": [ 29277275106, 29269131345, 29259887283 ],\n \"samples_ts\": [ 4.37199, 4.37321, 4.37459 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T21:46:56Z\",\n \"avg_ns\": 67631260405,\n \"stddev_ns\": 3723269429,\n \"avg_ts\": 1.908893,\n \"stddev_ts\": 0.213613,\n \"samples_ns\": [ 60645345581, 75959229811, 66289205823 ],\n \"samples_ts\": [ 2.11063, 1.68511, 1.93093 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T21:44:53Z", "avg_ns": 29268764578, "stddev_ns": 8699711, "avg_ts": 4.373263, "stddev_ts": 0.0013, "samples_ns": [ 29277275106, 29269131345, 29259887283 ], "samples_ts": [ 4.37199, 4.37321, 4.37459 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-10T21:46:56Z", "avg_ns": 67631260405, "stddev_ns": 3723269429, "avg_ts": 1.908893, "stddev_ts": 0.213613, "samples_ns": [ 60645345581, 75959229811, 66289205823 ], "samples_ts": [ 2.11063, 1.68511, 1.93093 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 600 }, { "timestamp_utc": "2025-12-10T22:05:11.831967+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T21:50:20Z\",\n \"avg_ns\": 29638684699,\n \"stddev_ns\": 696280740,\n \"avg_ts\": 4.320248,\n \"stddev_ts\": 0.100134,\n \"samples_ns\": [ 29237882174, 29235492662, 30442679261 ],\n \"samples_ts\": [ 4.37788, 4.37824, 4.20462 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T21:52:18Z\",\n \"avg_ns\": 257462759525,\n \"stddev_ns\": 533303273,\n \"avg_ts\": 1.988643,\n \"stddev_ts\": 0.004124,\n \"samples_ns\": [ 256847652662, 257795705557, 257744920357 ],\n \"samples_ts\": [ 1.9934, 1.98607, 1.98646 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T21:50:20Z", "avg_ns": 29638684699, "stddev_ns": 696280740, "avg_ts": 4.320248, "stddev_ts": 0.100134, "samples_ns": [ 29237882174, 29235492662, 30442679261 ], "samples_ts": [ 4.37788, 4.37824, 4.20462 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-10T21:52:18Z", "avg_ns": 257462759525, "stddev_ns": 533303273, "avg_ts": 1.988643, "stddev_ts": 0.004124, "samples_ns": [ 256847652662, 257795705557, 257744920357 ], "samples_ts": [ 1.9934, 1.98607, 1.98646 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 601 }, { "timestamp_utc": "2025-12-10T22:17:54.170965+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T22:05:12Z\",\n \"avg_ns\": 166730291752,\n \"stddev_ns\": 3841825596,\n \"avg_ts\": 3.225327,\n \"stddev_ts\": 0.933934,\n \"samples_ns\": [ 191644088242, 118969893156, 189576893858 ],\n \"samples_ts\": [ 2.67162, 4.30361, 2.70075 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T22:15:29Z\",\n \"avg_ns\": 47946206740,\n \"stddev_ns\": 3309664874,\n \"avg_ts\": 2.736432,\n \"stddev_ts\": 0.495415,\n \"samples_ns\": [ 42387149409, 42312240822, 59139229989 ],\n \"samples_ts\": [ 3.01978, 3.02513, 2.16438 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T22:05:12Z", "avg_ns": 166730291752, "stddev_ns": 3841825596, "avg_ts": 3.225327, "stddev_ts": 0.933934, "samples_ns": [ 191644088242, 118969893156, 189576893858 ], "samples_ts": [ 2.67162, 4.30361, 2.70075 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-10T22:15:29Z", "avg_ns": 47946206740, "stddev_ns": 3309664874, "avg_ts": 2.736432, "stddev_ts": 0.495415, "samples_ns": [ 42387149409, 42312240822, 59139229989 ], "samples_ts": [ 3.01978, 3.02513, 2.16438 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 602 }, { "timestamp_utc": "2025-12-10T22:41:01.038697+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T22:17:55Z\",\n \"avg_ns\": 151466350449,\n \"stddev_ns\": 2300202971,\n \"avg_ts\": 3.400024,\n \"stddev_ts\": 0.310935,\n \"samples_ns\": [ 140196441990, 167728956859, 146473652499 ],\n \"samples_ts\": [ 3.65202, 3.05254, 3.49551 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T22:28:23Z\",\n \"avg_ns\": 252139089787,\n \"stddev_ns\": 2434399118,\n \"avg_ts\": 2.037113,\n \"stddev_ts\": 0.143633,\n \"samples_ns\": [ 232417082300, 261635885542, 262364301520 ],\n \"samples_ts\": [ 2.20294, 1.95692, 1.95149 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T22:17:55Z", "avg_ns": 151466350449, "stddev_ns": 2300202971, "avg_ts": 3.400024, "stddev_ts": 0.310935, "samples_ns": [ 140196441990, 167728956859, 146473652499 ], "samples_ts": [ 3.65202, 3.05254, 3.49551 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-10T22:28:23Z", "avg_ns": 252139089787, "stddev_ns": 2434399118, "avg_ts": 2.037113, "stddev_ts": 0.143633, "samples_ns": [ 232417082300, 261635885542, 262364301520 ], "samples_ts": [ 2.20294, 1.95692, 1.95149 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 603 }, { "timestamp_utc": "2025-12-10T22:46:22.632822+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T22:41:01Z\",\n \"avg_ns\": 40292332239,\n \"stddev_ns\": 2679153091,\n \"avg_ts\": 3.353264,\n \"stddev_ts\": 0.958728,\n \"samples_ns\": [ 29266396821, 39813367131, 51797232765 ],\n \"samples_ts\": [ 4.37362, 3.215, 2.47117 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T22:43:31Z\",\n \"avg_ns\": 56653175669,\n \"stddev_ns\": 3763480566,\n \"avg_ts\": 2.396228,\n \"stddev_ts\": 0.666949,\n \"samples_ns\": [ 76444083472, 50701282001, 42814161536 ],\n \"samples_ts\": [ 1.67443, 2.52459, 2.98966 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T22:41:01Z", "avg_ns": 40292332239, "stddev_ns": 2679153091, "avg_ts": 3.353264, "stddev_ts": 0.958728, "samples_ns": [ 29266396821, 39813367131, 51797232765 ], "samples_ts": [ 4.37362, 3.215, 2.47117 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-10T22:43:31Z", "avg_ns": 56653175669, "stddev_ns": 3763480566, "avg_ts": 2.396228, "stddev_ts": 0.666949, "samples_ns": [ 76444083472, 50701282001, 42814161536 ], "samples_ts": [ 1.67443, 2.52459, 2.98966 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 604 }, { "timestamp_utc": "2025-12-10T23:01:15.988097+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T22:46:23Z\",\n \"avg_ns\": 45734365955,\n \"stddev_ns\": 2374029756,\n \"avg_ts\": 2.913136,\n \"stddev_ts\": 0.758150,\n \"samples_ns\": [ 33786183026, 51862740315, 51554174524 ],\n \"samples_ts\": [ 3.78853, 2.46805, 2.48283 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T22:49:10Z\",\n \"avg_ns\": 241745837397,\n \"stddev_ns\": 1914163138,\n \"avg_ts\": 2.124012,\n \"stddev_ts\": 0.138914,\n \"samples_ns\": [ 226494542375, 240525945601, 258217024215 ],\n \"samples_ts\": [ 2.26054, 2.12867, 1.98283 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T22:46:23Z", "avg_ns": 45734365955, "stddev_ns": 2374029756, "avg_ts": 2.913136, "stddev_ts": 0.75815, "samples_ns": [ 33786183026, 51862740315, 51554174524 ], "samples_ts": [ 3.78853, 2.46805, 2.48283 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-10T22:49:10Z", "avg_ns": 241745837397, "stddev_ns": 1914163138, "avg_ts": 2.124012, "stddev_ts": 0.138914, "samples_ns": [ 226494542375, 240525945601, 258217024215 ], "samples_ts": [ 2.26054, 2.12867, 1.98283 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 605 }, { "timestamp_utc": "2025-12-10T23:15:08.915871+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T23:01:16Z\",\n \"avg_ns\": 153443890056,\n \"stddev_ns\": 2278706953,\n \"avg_ts\": 3.373216,\n \"stddev_ts\": 0.419651,\n \"samples_ns\": [ 147285734139, 175845373134, 137200562896 ],\n \"samples_ts\": [ 3.47624, 2.91165, 3.73176 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T23:11:43Z\",\n \"avg_ns\": 68480699706,\n \"stddev_ns\": 1039716377,\n \"avg_ts\": 1.932741,\n \"stddev_ts\": 0.457510,\n \"samples_ns\": [ 52020928665, 77606212249, 75814958206 ],\n \"samples_ts\": [ 2.46055, 1.64935, 1.68832 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T23:01:16Z", "avg_ns": 153443890056, "stddev_ns": 2278706953, "avg_ts": 3.373216, "stddev_ts": 0.419651, "samples_ns": [ 147285734139, 175845373134, 137200562896 ], "samples_ts": [ 3.47624, 2.91165, 3.73176 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-10T23:11:43Z", "avg_ns": 68480699706, "stddev_ns": 1039716377, "avg_ts": 1.932741, "stddev_ts": 0.45751, "samples_ns": [ 52020928665, 77606212249, 75814958206 ], "samples_ts": [ 2.46055, 1.64935, 1.68832 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 606 }, { "timestamp_utc": "2025-12-10T23:37:07.777061+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T23:15:09Z\",\n \"avg_ns\": 166971417963,\n \"stddev_ns\": 2140886876,\n \"avg_ts\": 3.231523,\n \"stddev_ts\": 0.969024,\n \"samples_ns\": [ 191702626459, 117688836826, 191522790604 ],\n \"samples_ts\": [ 2.6708, 4.35046, 2.67331 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T23:25:28Z\",\n \"avg_ns\": 233007694525,\n \"stddev_ns\": 1317869555,\n \"avg_ts\": 2.199129,\n \"stddev_ts\": 0.076294,\n \"samples_ns\": [ 241699038079, 225556254827, 231767790671 ],\n \"samples_ts\": [ 2.11834, 2.26994, 2.20911 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T23:15:09Z", "avg_ns": 166971417963, "stddev_ns": 2140886876, "avg_ts": 3.231523, "stddev_ts": 0.969024, "samples_ns": [ 191702626459, 117688836826, 191522790604 ], "samples_ts": [ 2.6708, 4.35046, 2.67331 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-10T23:25:28Z", "avg_ns": 233007694525, "stddev_ns": 1317869555, "avg_ts": 2.199129, "stddev_ts": 0.076294, "samples_ns": [ 241699038079, 225556254827, 231767790671 ], "samples_ts": [ 2.11834, 2.26994, 2.20911 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 607 }, { "timestamp_utc": "2025-12-10T23:42:39.534024+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T23:37:08Z\",\n \"avg_ns\": 48450662681,\n \"stddev_ns\": 4077821356,\n \"avg_ts\": 2.670484,\n \"stddev_ts\": 0.351217,\n \"samples_ns\": [ 51887711487, 51852235218, 41612041339 ],\n \"samples_ts\": [ 2.46687, 2.46855, 3.07603 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T23:40:19Z\",\n \"avg_ns\": 46354731181,\n \"stddev_ns\": 4121836556,\n \"avg_ts\": 2.789871,\n \"stddev_ts\": 0.333547,\n \"samples_ns\": [ 42885197922, 42950622954, 53228372669 ],\n \"samples_ts\": [ 2.98471, 2.98017, 2.40473 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T23:37:08Z", "avg_ns": 48450662681, "stddev_ns": 4077821356, "avg_ts": 2.670484, "stddev_ts": 0.351217, "samples_ns": [ 51887711487, 51852235218, 41612041339 ], "samples_ts": [ 2.46687, 2.46855, 3.07603 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-10T23:40:19Z", "avg_ns": 46354731181, "stddev_ns": 4121836556, "avg_ts": 2.789871, "stddev_ts": 0.333547, "samples_ns": [ 42885197922, 42950622954, 53228372669 ], "samples_ts": [ 2.98471, 2.98017, 2.40473 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 608 }, { "timestamp_utc": "2025-12-10T23:57:26.366842+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T23:42:40Z\",\n \"avg_ns\": 42915060883,\n \"stddev_ns\": 3917026436,\n \"avg_ts\": 3.177057,\n \"stddev_ts\": 1.043922,\n \"samples_ns\": [ 51856360223, 47637353026, 29251469401 ],\n \"samples_ts\": [ 2.46836, 2.68697, 4.37585 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T23:45:41Z\",\n \"avg_ns\": 234820175314,\n \"stddev_ns\": 4280831922,\n \"avg_ts\": 2.186727,\n \"stddev_ts\": 0.141444,\n \"samples_ns\": [ 253034039252, 226031351429, 225395135263 ],\n \"samples_ts\": [ 2.02344, 2.26517, 2.27157 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T23:42:40Z", "avg_ns": 42915060883, "stddev_ns": 3917026436, "avg_ts": 3.177057, "stddev_ts": 1.043922, "samples_ns": [ 51856360223, 47637353026, 29251469401 ], "samples_ts": [ 2.46836, 2.68697, 4.37585 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-10T23:45:41Z", "avg_ns": 234820175314, "stddev_ns": 4280831922, "avg_ts": 2.186727, "stddev_ts": 0.141444, "samples_ns": [ 253034039252, 226031351429, 225395135263 ], "samples_ts": [ 2.02344, 2.26517, 2.27157 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 609 }, { "timestamp_utc": "2025-12-11T00:11:08.964789+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T23:57:27Z\",\n \"avg_ns\": 149695354188,\n \"stddev_ns\": 2367838445,\n \"avg_ts\": 3.483364,\n \"stddev_ts\": 0.551906,\n \"samples_ns\": [ 130863439751, 178972965979, 139249656835 ],\n \"samples_ts\": [ 3.91248, 2.86077, 3.67685 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T00:08:04Z\",\n \"avg_ns\": 61261781330,\n \"stddev_ns\": 534327089,\n \"avg_ts\": 2.209763,\n \"stddev_ts\": 0.660813,\n \"samples_ns\": [ 77192849859, 63087101483, 43505392648 ],\n \"samples_ts\": [ 1.65818, 2.02894, 2.94216 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-10T23:57:27Z", "avg_ns": 149695354188, "stddev_ns": 2367838445, "avg_ts": 3.483364, "stddev_ts": 0.551906, "samples_ns": [ 130863439751, 178972965979, 139249656835 ], "samples_ts": [ 3.91248, 2.86077, 3.67685 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-11T00:08:04Z", "avg_ns": 61261781330, "stddev_ns": 534327089, "avg_ts": 2.209763, "stddev_ts": 0.660813, "samples_ns": [ 77192849859, 63087101483, 43505392648 ], "samples_ts": [ 1.65818, 2.02894, 2.94216 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 610 }, { "timestamp_utc": "2025-12-11T00:34:07.028048+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T00:11:09Z\",\n \"avg_ns\": 154855673799,\n \"stddev_ns\": 4113881790,\n \"avg_ts\": 3.322036,\n \"stddev_ts\": 0.277531,\n \"samples_ns\": [ 152530539855, 169043856374, 142992625170 ],\n \"samples_ts\": [ 3.3567, 3.0288, 3.5806 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T00:21:33Z\",\n \"avg_ns\": 251164373508,\n \"stddev_ns\": 983914033,\n \"avg_ts\": 2.043477,\n \"stddev_ts\": 0.125603,\n \"samples_ns\": [ 259323210346, 260215227423, 233954682756 ],\n \"samples_ts\": [ 1.97437, 1.9676, 2.18846 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-11T00:11:09Z", "avg_ns": 154855673799, "stddev_ns": 4113881790, "avg_ts": 3.322036, "stddev_ts": 0.277531, "samples_ns": [ 152530539855, 169043856374, 142992625170 ], "samples_ts": [ 3.3567, 3.0288, 3.5806 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-11T00:21:33Z", "avg_ns": 251164373508, "stddev_ns": 983914033, "avg_ts": 2.043477, "stddev_ts": 0.125603, "samples_ns": [ 259323210346, 260215227423, 233954682756 ], "samples_ts": [ 1.97437, 1.9676, 2.18846 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 611 }, { "timestamp_utc": "2025-12-11T00:39:18.395516+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T00:34:08Z\",\n \"avg_ns\": 16464739921,\n \"stddev_ns\": 219170040,\n \"avg_ts\": 7.937715,\n \"stddev_ts\": 1.326354,\n \"samples_ns\": [ 19980658291, 14718865459, 14694696014 ],\n \"samples_ts\": [ 6.4062, 8.69632, 8.71063 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T00:35:48Z\",\n \"avg_ns\": 69732962297,\n \"stddev_ns\": 3843950777,\n \"avg_ts\": 2.502547,\n \"stddev_ts\": 1.897251,\n \"samples_ns\": [ 27272906788, 90812433184, 91113546921 ],\n \"samples_ts\": [ 4.6933, 1.4095, 1.40484 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-11T00:34:08Z", "avg_ns": 16464739921, "stddev_ns": 219170040, "avg_ts": 7.937715, "stddev_ts": 1.326354, "samples_ns": [ 19980658291, 14718865459, 14694696014 ], "samples_ts": [ 6.4062, 8.69632, 8.71063 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-11T00:35:48Z", "avg_ns": 69732962297, "stddev_ns": 3843950777, "avg_ts": 2.502547, "stddev_ts": 1.897251, "samples_ns": [ 27272906788, 90812433184, 91113546921 ], "samples_ts": [ 4.6933, 1.4095, 1.40484 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 512, "ubatch_size": 128, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 612 }, { "timestamp_utc": "2025-12-11T00:51:21.442589+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T00:39:19Z\",\n \"avg_ns\": 14736955161,\n \"stddev_ns\": 45936907,\n \"avg_ts\": 8.685704,\n \"stddev_ts\": 0.027031,\n \"samples_ns\": [ 14703342454, 14718225070, 14789297959 ],\n \"samples_ts\": [ 8.7055, 8.6967, 8.65491 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T00:40:52Z\",\n \"avg_ns\": 209635713941,\n \"stddev_ns\": 2097757675,\n \"avg_ts\": 2.532357,\n \"stddev_ts\": 0.568652,\n \"samples_ns\": [ 264909242968, 196891414736, 167106484119 ],\n \"samples_ts\": [ 1.93274, 2.60042, 3.06391 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-11T00:39:19Z", "avg_ns": 14736955161, "stddev_ns": 45936907, "avg_ts": 8.685704, "stddev_ts": 0.027031, "samples_ns": [ 14703342454, 14718225070, 14789297959 ], "samples_ts": [ 8.7055, 8.6967, 8.65491 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-11T00:40:52Z", "avg_ns": 209635713941, "stddev_ns": 2097757675, "avg_ts": 2.532357, "stddev_ts": 0.568652, "samples_ns": [ 264909242968, 196891414736, 167106484119 ], "samples_ts": [ 1.93274, 2.60042, 3.06391 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 512, "ubatch_size": 128, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 613 }, { "timestamp_utc": "2025-12-11T01:04:06.339618+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T00:51:23Z\",\n \"avg_ns\": 119362696041,\n \"stddev_ns\": 2857858724,\n \"avg_ts\": 4.865382,\n \"stddev_ts\": 2.048860,\n \"samples_ns\": [ 73117982937, 175473512720, 109496592467 ],\n \"samples_ts\": [ 7.00238, 2.91782, 4.67594 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T01:00:45Z\",\n \"avg_ns\": 66869353145,\n \"stddev_ns\": 2321121207,\n \"avg_ts\": 2.684874,\n \"stddev_ts\": 2.122672,\n \"samples_ns\": [ 24927669697, 84068647581, 91611742159 ],\n \"samples_ts\": [ 5.13486, 1.52257, 1.3972 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-11T00:51:23Z", "avg_ns": 119362696041, "stddev_ns": 2857858724, "avg_ts": 4.865382, "stddev_ts": 2.04886, "samples_ns": [ 73117982937, 175473512720, 109496592467 ], "samples_ts": [ 7.00238, 2.91782, 4.67594 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-11T01:00:45Z", "avg_ns": 66869353145, "stddev_ns": 2321121207, "avg_ts": 2.684874, "stddev_ts": 2.122672, "samples_ns": [ 24927669697, 84068647581, 91611742159 ], "samples_ts": [ 5.13486, 1.52257, 1.3972 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 512, "ubatch_size": 128, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 614 }, { "timestamp_utc": "2025-12-11T01:24:54.327806+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T01:04:07Z\",\n \"avg_ns\": 131895545198,\n \"stddev_ns\": 919386378,\n \"avg_ts\": 3.939954,\n \"stddev_ts\": 0.606200,\n \"samples_ns\": [ 147310484601, 137691464443, 110684686551 ],\n \"samples_ts\": [ 3.47565, 3.71846, 4.62575 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T01:12:24Z\",\n \"avg_ns\": 249563575523,\n \"stddev_ns\": 3413817620,\n \"avg_ts\": 2.071453,\n \"stddev_ts\": 0.256504,\n \"samples_ns\": [ 216468730885, 270617224407, 261604771277 ],\n \"samples_ts\": [ 2.36524, 1.89197, 1.95715 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-11T01:04:07Z", "avg_ns": 131895545198, "stddev_ns": 919386378, "avg_ts": 3.939954, "stddev_ts": 0.6062, "samples_ns": [ 147310484601, 137691464443, 110684686551 ], "samples_ts": [ 3.47565, 3.71846, 4.62575 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-11T01:12:24Z", "avg_ns": 249563575523, "stddev_ns": 3413817620, "avg_ts": 2.071453, "stddev_ts": 0.256504, "samples_ns": [ 216468730885, 270617224407, 261604771277 ], "samples_ts": [ 2.36524, 1.89197, 1.95715 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 512, "ubatch_size": 128, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 615 }, { "timestamp_utc": "2025-12-11T01:30:06.808809+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T01:24:55Z\",\n \"avg_ns\": 14723585338,\n \"stddev_ns\": 30530825,\n \"avg_ts\": 8.693560,\n \"stddev_ts\": 0.018041,\n \"samples_ns\": [ 14749022313, 14732005019, 14689728684 ],\n \"samples_ts\": [ 8.67854, 8.68857, 8.71357 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T01:26:02Z\",\n \"avg_ns\": 81284049515,\n \"stddev_ns\": 3945289792,\n \"avg_ts\": 1.614488,\n \"stddev_ts\": 0.328001,\n \"samples_ns\": [ 64217916318, 90016664466, 89617567762 ],\n \"samples_ts\": [ 1.99321, 1.42196, 1.42829 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-11T01:24:55Z", "avg_ns": 14723585338, "stddev_ns": 30530825, "avg_ts": 8.69356, "stddev_ts": 0.018041, "samples_ns": [ 14749022313, 14732005019, 14689728684 ], "samples_ts": [ 8.67854, 8.68857, 8.71357 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-11T01:26:02Z", "avg_ns": 81284049515, "stddev_ns": 3945289792, "avg_ts": 1.614488, "stddev_ts": 0.328001, "samples_ns": [ 64217916318, 90016664466, 89617567762 ], "samples_ts": [ 1.99321, 1.42196, 1.42829 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 512, "ubatch_size": 256, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 616 }, { "timestamp_utc": "2025-12-11T01:42:16.001756+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T01:30:07Z\",\n \"avg_ns\": 14723689268,\n \"stddev_ns\": 17979659,\n \"avg_ts\": 8.693482,\n \"stddev_ts\": 0.010608,\n \"samples_ns\": [ 14712414619, 14744422964, 14714230223 ],\n \"samples_ts\": [ 8.70014, 8.68125, 8.69906 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T01:31:06Z\",\n \"avg_ns\": 223004230552,\n \"stddev_ns\": 1430551399,\n \"avg_ts\": 2.388407,\n \"stddev_ts\": 0.608742,\n \"samples_ns\": [ 265106558287, 237640359813, 166265773558 ],\n \"samples_ts\": [ 1.9313, 2.15452, 3.07941 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-11T01:30:07Z", "avg_ns": 14723689268, "stddev_ns": 17979659, "avg_ts": 8.693482, "stddev_ts": 0.010608, "samples_ns": [ 14712414619, 14744422964, 14714230223 ], "samples_ts": [ 8.70014, 8.68125, 8.69906 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-11T01:31:06Z", "avg_ns": 223004230552, "stddev_ns": 1430551399, "avg_ts": 2.388407, "stddev_ts": 0.608742, "samples_ns": [ 265106558287, 237640359813, 166265773558 ], "samples_ts": [ 1.9313, 2.15452, 3.07941 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 512, "ubatch_size": 256, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 617 }, { "timestamp_utc": "2025-12-11T01:54:54.249764+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T01:42:17Z\",\n \"avg_ns\": 114381696280,\n \"stddev_ns\": 4086253766,\n \"avg_ts\": 5.683132,\n \"stddev_ts\": 2.755106,\n \"samples_ns\": [ 66612690190, 201480988412, 75051410240 ],\n \"samples_ts\": [ 7.68622, 2.54118, 6.82199 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T01:50:59Z\",\n \"avg_ns\": 78149256775,\n \"stddev_ns\": 1697208764,\n \"avg_ts\": 1.726206,\n \"stddev_ts\": 0.517964,\n \"samples_ns\": [ 55070452393, 89798687237, 89578630697 ],\n \"samples_ts\": [ 2.3243, 1.42541, 1.42891 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-11T01:42:17Z", "avg_ns": 114381696280, "stddev_ns": 4086253766, "avg_ts": 5.683132, "stddev_ts": 2.755106, "samples_ns": [ 66612690190, 201480988412, 75051410240 ], "samples_ts": [ 7.68622, 2.54118, 6.82199 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-11T01:50:59Z", "avg_ns": 78149256775, "stddev_ns": 1697208764, "avg_ts": 1.726206, "stddev_ts": 0.517964, "samples_ns": [ 55070452393, 89798687237, 89578630697 ], "samples_ts": [ 2.3243, 1.42541, 1.42891 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 512, "ubatch_size": 256, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 618 }, { "timestamp_utc": "2025-12-11T02:15:10.421049+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T01:54:55Z\",\n \"avg_ns\": 141542091146,\n \"stddev_ns\": 4235084661,\n \"avg_ts\": 3.791607,\n \"stddev_ts\": 1.013341,\n \"samples_ns\": [ 178969764188, 105110319433, 140546189817 ],\n \"samples_ts\": [ 2.86082, 4.87107, 3.64293 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T02:03:07Z\",\n \"avg_ns\": 240704846051,\n \"stddev_ns\": 1875493850,\n \"avg_ts\": 2.191489,\n \"stddev_ts\": 0.488798,\n \"samples_ns\": [ 185788547135, 266982440144, 269343550875 ],\n \"samples_ts\": [ 2.75582, 1.91773, 1.90092 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-11T01:54:55Z", "avg_ns": 141542091146, "stddev_ns": 4235084661, "avg_ts": 3.791607, "stddev_ts": 1.013341, "samples_ns": [ 178969764188, 105110319433, 140546189817 ], "samples_ts": [ 2.86082, 4.87107, 3.64293 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-11T02:03:07Z", "avg_ns": 240704846051, "stddev_ns": 1875493850, "avg_ts": 2.191489, "stddev_ts": 0.488798, "samples_ns": [ 185788547135, 266982440144, 269343550875 ], "samples_ts": [ 2.75582, 1.91773, 1.90092 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 512, "ubatch_size": 256, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 619 }, { "timestamp_utc": "2025-12-11T02:20:14.706080+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T02:15:11Z\",\n \"avg_ns\": 14991751883,\n \"stddev_ns\": 513300969,\n \"avg_ts\": 8.544577,\n \"stddev_ts\": 0.286886,\n \"samples_ns\": [ 14693616370, 14697182083, 15584457197 ],\n \"samples_ts\": [ 8.71127, 8.70915, 8.21331 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T02:16:10Z\",\n \"avg_ns\": 81128844281,\n \"stddev_ns\": 1587843136,\n \"avg_ts\": 1.597029,\n \"stddev_ts\": 0.220999,\n \"samples_ns\": [ 83880374687, 90122433956, 69383724200 ],\n \"samples_ts\": [ 1.52598, 1.42029, 1.84481 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-11T02:15:11Z", "avg_ns": 14991751883, "stddev_ns": 513300969, "avg_ts": 8.544577, "stddev_ts": 0.286886, "samples_ns": [ 14693616370, 14697182083, 15584457197 ], "samples_ts": [ 8.71127, 8.70915, 8.21331 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-11T02:16:10Z", "avg_ns": 81128844281, "stddev_ns": 1587843136, "avg_ts": 1.597029, "stddev_ts": 0.220999, "samples_ns": [ 83880374687, 90122433956, 69383724200 ], "samples_ts": [ 1.52598, 1.42029, 1.84481 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 512, "ubatch_size": 512, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 620 }, { "timestamp_utc": "2025-12-11T02:32:46.508316+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T02:20:15Z\",\n \"avg_ns\": 16766090024,\n \"stddev_ns\": 3468245846,\n \"avg_ts\": 7.834104,\n \"stddev_ts\": 1.447690,\n \"samples_ns\": [ 14748958802, 14778472150, 20770839120 ],\n \"samples_ts\": [ 8.67858, 8.66125, 6.16249 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T02:21:20Z\",\n \"avg_ns\": 228303093671,\n \"stddev_ns\": 3827415819,\n \"avg_ts\": 2.338644,\n \"stddev_ts\": 0.623407,\n \"samples_ns\": [ 258577974580, 258928459206, 167402847229 ],\n \"samples_ts\": [ 1.98006, 1.97738, 3.05849 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-11T02:20:15Z", "avg_ns": 16766090024, "stddev_ns": 3468245846, "avg_ts": 7.834104, "stddev_ts": 1.44769, "samples_ns": [ 14748958802, 14778472150, 20770839120 ], "samples_ts": [ 8.67858, 8.66125, 6.16249 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-11T02:21:20Z", "avg_ns": 228303093671, "stddev_ns": 3827415819, "avg_ts": 2.338644, "stddev_ts": 0.623407, "samples_ns": [ 258577974580, 258928459206, 167402847229 ], "samples_ts": [ 1.98006, 1.97738, 3.05849 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 512, "ubatch_size": 512, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 621 }, { "timestamp_utc": "2025-12-11T02:45:14.988320+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T02:32:48Z\",\n \"avg_ns\": 114827209997,\n \"stddev_ns\": 4153298739,\n \"avg_ts\": 5.616477,\n \"stddev_ts\": 2.947104,\n \"samples_ns\": [ 89971785527, 194456707565, 60053136901 ],\n \"samples_ts\": [ 5.69067, 2.63298, 8.52578 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T02:41:10Z\",\n \"avg_ns\": 81487742487,\n \"stddev_ns\": 4264635006,\n \"avg_ts\": 1.593918,\n \"stddev_ts\": 0.241182,\n \"samples_ns\": [ 83708370715, 91970208709, 68784648037 ],\n \"samples_ts\": [ 1.52912, 1.39176, 1.86088 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-11T02:32:48Z", "avg_ns": 114827209997, "stddev_ns": 4153298739, "avg_ts": 5.616477, "stddev_ts": 2.947104, "samples_ns": [ 89971785527, 194456707565, 60053136901 ], "samples_ts": [ 5.69067, 2.63298, 8.52578 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-11T02:41:10Z", "avg_ns": 81487742487, "stddev_ns": 4264635006, "avg_ts": 1.593918, "stddev_ts": 0.241182, "samples_ns": [ 83708370715, 91970208709, 68784648037 ], "samples_ts": [ 1.52912, 1.39176, 1.86088 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 512, "ubatch_size": 512, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 622 }, { "timestamp_utc": "2025-12-11T03:05:24.271059+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T02:45:15Z\",\n \"avg_ns\": 149869546011,\n \"stddev_ns\": 3814843656,\n \"avg_ts\": 4.098589,\n \"stddev_ts\": 2.338995,\n \"samples_ns\": [ 201971737539, 75432199784, 172204700711 ],\n \"samples_ts\": [ 2.53501, 6.78755, 2.97321 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T02:53:52Z\",\n \"avg_ns\": 230475340571,\n \"stddev_ns\": 3854817231,\n \"avg_ts\": 2.274775,\n \"stddev_ts\": 0.437931,\n \"samples_ns\": [ 185857054035, 235677367574, 269891600104 ],\n \"samples_ts\": [ 2.75481, 2.17246, 1.89706 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-11T02:45:15Z", "avg_ns": 149869546011, "stddev_ns": 3814843656, "avg_ts": 4.098589, "stddev_ts": 2.338995, "samples_ns": [ 201971737539, 75432199784, 172204700711 ], "samples_ts": [ 2.53501, 6.78755, 2.97321 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-11T02:53:52Z", "avg_ns": 230475340571, "stddev_ns": 3854817231, "avg_ts": 2.274775, "stddev_ts": 0.437931, "samples_ns": [ 185857054035, 235677367574, 269891600104 ], "samples_ts": [ 2.75481, 2.17246, 1.89706 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 512, "ubatch_size": 512, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 623 }, { "timestamp_utc": "2025-12-11T03:10:27.536075+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T03:05:25Z\",\n \"avg_ns\": 24342748877,\n \"stddev_ns\": 4135610344,\n \"avg_ts\": 6.653568,\n \"stddev_ts\": 3.181350,\n \"samples_ns\": [ 14732892646, 15449175496, 42846178490 ],\n \"samples_ts\": [ 8.68804, 8.28523, 2.98743 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T03:06:52Z\",\n \"avg_ns\": 71255341187,\n \"stddev_ns\": 3712854669,\n \"avg_ts\": 2.234255,\n \"stddev_ts\": 1.415775,\n \"samples_ns\": [ 90625213210, 90057698158, 33083112193 ],\n \"samples_ts\": [ 1.41241, 1.42131, 3.86904 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-11T03:05:25Z", "avg_ns": 24342748877, "stddev_ns": 4135610344, "avg_ts": 6.653568, "stddev_ts": 3.18135, "samples_ns": [ 14732892646, 15449175496, 42846178490 ], "samples_ts": [ 8.68804, 8.28523, 2.98743 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-11T03:06:52Z", "avg_ns": 71255341187, "stddev_ns": 3712854669, "avg_ts": 2.234255, "stddev_ts": 1.415775, "samples_ns": [ 90625213210, 90057698158, 33083112193 ], "samples_ts": [ 1.41241, 1.42131, 3.86904 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 624 }, { "timestamp_utc": "2025-12-11T03:24:37.258501+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T03:10:28Z\",\n \"avg_ns\": 27146114758,\n \"stddev_ns\": 4226486993,\n \"avg_ts\": 6.204613,\n \"stddev_ts\": 3.189912,\n \"samples_ns\": [ 14783592601, 17398554033, 49256197642 ],\n \"samples_ts\": [ 8.65825, 7.35693, 2.59866 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T03:12:04Z\",\n \"avg_ns\": 250497810882,\n \"stddev_ns\": 4264668079,\n \"avg_ts\": 2.052303,\n \"stddev_ts\": 0.160069,\n \"samples_ns\": [ 231720932772, 270962978307, 248809521568 ],\n \"samples_ts\": [ 2.20955, 1.88956, 2.0578 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-11T03:10:28Z", "avg_ns": 27146114758, "stddev_ns": 4226486993, "avg_ts": 6.204613, "stddev_ts": 3.189912, "samples_ns": [ 14783592601, 17398554033, 49256197642 ], "samples_ts": [ 8.65825, 7.35693, 2.59866 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-11T03:12:04Z", "avg_ns": 250497810882, "stddev_ns": 4264668079, "avg_ts": 2.052303, "stddev_ts": 0.160069, "samples_ns": [ 231720932772, 270962978307, 248809521568 ], "samples_ts": [ 2.20955, 1.88956, 2.0578 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 625 }, { "timestamp_utc": "2025-12-11T03:35:44.320801+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T03:24:38Z\",\n \"avg_ns\": 138392876756,\n \"stddev_ns\": 3869872694,\n \"avg_ts\": 3.788894,\n \"stddev_ts\": 0.690879,\n \"samples_ns\": [ 168388286186, 116297135616, 130493208468 ],\n \"samples_ts\": [ 3.04059, 4.40252, 3.92358 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T03:32:53Z\",\n \"avg_ns\": 56775917952,\n \"stddev_ns\": 1347832650,\n \"avg_ts\": 3.028189,\n \"stddev_ts\": 2.061946,\n \"samples_ns\": [ 91803882730, 54575962468, 23947908660 ],\n \"samples_ts\": [ 1.39428, 2.34535, 5.34493 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-11T03:24:38Z", "avg_ns": 138392876756, "stddev_ns": 3869872694, "avg_ts": 3.788894, "stddev_ts": 0.690879, "samples_ns": [ 168388286186, 116297135616, 130493208468 ], "samples_ts": [ 3.04059, 4.40252, 3.92358 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-11T03:32:53Z", "avg_ns": 56775917952, "stddev_ns": 1347832650, "avg_ts": 3.028189, "stddev_ts": 2.061946, "samples_ns": [ 91803882730, 54575962468, 23947908660 ], "samples_ts": [ 1.39428, 2.34535, 5.34493 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 626 }, { "timestamp_utc": "2025-12-11T03:55:52.865811+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T03:35:45Z\",\n \"avg_ns\": 144015746744,\n \"stddev_ns\": 2034561447,\n \"avg_ts\": 3.793179,\n \"stddev_ts\": 1.217501,\n \"samples_ns\": [ 147411689297, 99636189461, 184999361475 ],\n \"samples_ts\": [ 3.47327, 5.1387, 2.76758 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T03:45:14Z\",\n \"avg_ns\": 212678704307,\n \"stddev_ns\": 2225306862,\n \"avg_ts\": 2.434308,\n \"stddev_ts\": 0.310170,\n \"samples_ns\": [ 208056451703, 187522250531, 242457410687 ],\n \"samples_ts\": [ 2.46087, 2.73034, 2.11171 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-11T03:35:45Z", "avg_ns": 144015746744, "stddev_ns": 2034561447, "avg_ts": 3.793179, "stddev_ts": 1.217501, "samples_ns": [ 147411689297, 99636189461, 184999361475 ], "samples_ts": [ 3.47327, 5.1387, 2.76758 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-11T03:45:14Z", "avg_ns": 212678704307, "stddev_ns": 2225306862, "avg_ts": 2.434308, "stddev_ts": 0.31017, "samples_ns": [ 208056451703, 187522250531, 242457410687 ], "samples_ts": [ 2.46087, 2.73034, 2.11171 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 627 }, { "timestamp_utc": "2025-12-11T04:00:53.244612+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T03:55:53Z\",\n \"avg_ns\": 47258106972,\n \"stddev_ns\": 2398054782,\n \"avg_ts\": 2.737920,\n \"stddev_ts\": 0.360380,\n \"samples_ns\": [ 40582751819, 50573247112, 50618321987 ],\n \"samples_ts\": [ 3.15405, 2.53098, 2.52873 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T03:58:30Z\",\n \"avg_ns\": 47233029766,\n \"stddev_ns\": 3659197379,\n \"avg_ts\": 3.814116,\n \"stddev_ts\": 2.110713,\n \"samples_ns\": [ 90330979973, 27638676597, 23729432730 ],\n \"samples_ts\": [ 1.41701, 4.63119, 5.39414 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-11T03:55:53Z", "avg_ns": 47258106972, "stddev_ns": 2398054782, "avg_ts": 2.73792, "stddev_ts": 0.36038, "samples_ns": [ 40582751819, 50573247112, 50618321987 ], "samples_ts": [ 3.15405, 2.53098, 2.52873 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-11T03:58:30Z", "avg_ns": 47233029766, "stddev_ns": 3659197379, "avg_ts": 3.814116, "stddev_ts": 2.110713, "samples_ns": [ 90330979973, 27638676597, 23729432730 ], "samples_ts": [ 1.41701, 4.63119, 5.39414 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 628 }, { "timestamp_utc": "2025-12-11T04:15:21.107059+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T04:00:54Z\",\n \"avg_ns\": 47745606073,\n \"stddev_ns\": 3932700465,\n \"avg_ts\": 2.701626,\n \"stddev_ts\": 0.299098,\n \"samples_ns\": [ 42009332040, 50718026835, 50509459345 ],\n \"samples_ts\": [ 3.04694, 2.52376, 2.53418 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T04:03:32Z\",\n \"avg_ns\": 235788531069,\n \"stddev_ns\": 4120582693,\n \"avg_ts\": 2.226508,\n \"stddev_ts\": 0.449072,\n \"samples_ns\": [ 187071846804, 249690428748, 270603317656 ],\n \"samples_ts\": [ 2.73692, 2.05054, 1.89207 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-11T04:00:54Z", "avg_ns": 47745606073, "stddev_ns": 3932700465, "avg_ts": 2.701626, "stddev_ts": 0.299098, "samples_ns": [ 42009332040, 50718026835, 50509459345 ], "samples_ts": [ 3.04694, 2.52376, 2.53418 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-11T04:03:32Z", "avg_ns": 235788531069, "stddev_ns": 4120582693, "avg_ts": 2.226508, "stddev_ts": 0.449072, "samples_ns": [ 187071846804, 249690428748, 270603317656 ], "samples_ts": [ 2.73692, 2.05054, 1.89207 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 629 }, { "timestamp_utc": "2025-12-11T04:26:00.004639+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T04:15:21Z\",\n \"avg_ns\": 149375203363,\n \"stddev_ns\": 3439693801,\n \"avg_ts\": 4.555268,\n \"stddev_ts\": 3.299361,\n \"samples_ns\": [ 201715843833, 61223548283, 185186217974 ],\n \"samples_ts\": [ 2.53822, 8.3628, 2.76478 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T04:24:10Z\",\n \"avg_ns\": 36183719619,\n \"stddev_ns\": 3741127843,\n \"avg_ts\": 4.474155,\n \"stddev_ts\": 2.134219,\n \"samples_ns\": [ 63688830895, 22431000471, 22431327491 ],\n \"samples_ts\": [ 2.00977, 5.70639, 5.70631 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-11T04:15:21Z", "avg_ns": 149375203363, "stddev_ns": 3439693801, "avg_ts": 4.555268, "stddev_ts": 3.299361, "samples_ns": [ 201715843833, 61223548283, 185186217974 ], "samples_ts": [ 2.53822, 8.3628, 2.76478 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-11T04:24:10Z", "avg_ns": 36183719619, "stddev_ns": 3741127843, "avg_ts": 4.474155, "stddev_ts": 2.134219, "samples_ns": [ 63688830895, 22431000471, 22431327491 ], "samples_ts": [ 2.00977, 5.70639, 5.70631 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 630 }, { "timestamp_utc": "2025-12-11T04:45:54.707239+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T04:26:00Z\",\n \"avg_ns\": 131290070401,\n \"stddev_ns\": 192020584,\n \"avg_ts\": 3.962508,\n \"stddev_ts\": 0.629257,\n \"samples_ns\": [ 109741325631, 135806871669, 148322013905 ],\n \"samples_ts\": [ 4.66552, 3.77006, 3.45195 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T04:35:29Z\",\n \"avg_ns\": 208380009492,\n \"stddev_ns\": 3574208826,\n \"avg_ts\": 2.496861,\n \"stddev_ts\": 0.403104,\n \"samples_ns\": [ 229425221094, 172929844289, 222784963094 ],\n \"samples_ts\": [ 2.23166, 2.96074, 2.29818 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-11T04:26:00Z", "avg_ns": 131290070401, "stddev_ns": 192020584, "avg_ts": 3.962508, "stddev_ts": 0.629257, "samples_ns": [ 109741325631, 135806871669, 148322013905 ], "samples_ts": [ 4.66552, 3.77006, 3.45195 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-11T04:35:29Z", "avg_ns": 208380009492, "stddev_ns": 3574208826, "avg_ts": 2.496861, "stddev_ts": 0.403104, "samples_ns": [ 229425221094, 172929844289, 222784963094 ], "samples_ts": [ 2.23166, 2.96074, 2.29818 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 631 }, { "timestamp_utc": "2025-12-11T04:50:55.171987+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T04:45:55Z\",\n \"avg_ns\": 48648588122,\n \"stddev_ns\": 3437820353,\n \"avg_ts\": 2.640278,\n \"stddev_ts\": 0.194512,\n \"samples_ns\": [ 44679099714, 50664638366, 50602026287 ],\n \"samples_ts\": [ 2.86487, 2.52642, 2.52954 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T04:48:37Z\",\n \"avg_ns\": 45730551449,\n \"stddev_ns\": 2280776749,\n \"avg_ts\": 4.072782,\n \"stddev_ts\": 2.292019,\n \"samples_ns\": [ 89749810108, 23719286074, 23722558166 ],\n \"samples_ts\": [ 1.42619, 5.39645, 5.39571 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-11T04:45:55Z", "avg_ns": 48648588122, "stddev_ns": 3437820353, "avg_ts": 2.640278, "stddev_ts": 0.194512, "samples_ns": [ 44679099714, 50664638366, 50602026287 ], "samples_ts": [ 2.86487, 2.52642, 2.52954 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-11T04:48:37Z", "avg_ns": 45730551449, "stddev_ns": 2280776749, "avg_ts": 4.072782, "stddev_ts": 2.292019, "samples_ns": [ 89749810108, 23719286074, 23722558166 ], "samples_ts": [ 1.42619, 5.39645, 5.39571 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 632 }, { "timestamp_utc": "2025-12-11T05:05:20.734942+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T04:50:55Z\",\n \"avg_ns\": 49365137653,\n \"stddev_ns\": 4088939795,\n \"avg_ts\": 2.598428,\n \"stddev_ts\": 0.148882,\n \"samples_ns\": [ 46203693644, 50942634924, 50949084392 ],\n \"samples_ts\": [ 2.77034, 2.51263, 2.51231 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T04:53:40Z\",\n \"avg_ns\": 233259285466,\n \"stddev_ns\": 740795531,\n \"avg_ts\": 2.252090,\n \"stddev_ts\": 0.458553,\n \"samples_ns\": [ 184833126024, 245185498177, 269759232197 ],\n \"samples_ts\": [ 2.77007, 2.08821, 1.89799 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-11T04:50:55Z", "avg_ns": 49365137653, "stddev_ns": 4088939795, "avg_ts": 2.598428, "stddev_ts": 0.148882, "samples_ns": [ 46203693644, 50942634924, 50949084392 ], "samples_ts": [ 2.77034, 2.51263, 2.51231 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-11T04:53:40Z", "avg_ns": 233259285466, "stddev_ns": 740795531, "avg_ts": 2.25209, "stddev_ts": 0.458553, "samples_ns": [ 184833126024, 245185498177, 269759232197 ], "samples_ts": [ 2.77007, 2.08821, 1.89799 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 633 }, { "timestamp_utc": "2025-12-11T05:16:04.166818+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T05:05:21Z\",\n \"avg_ns\": 149818581561,\n \"stddev_ns\": 2261091159,\n \"avg_ts\": 4.520427,\n \"stddev_ts\": 3.246274,\n \"samples_ns\": [ 202093224473, 61936016904, 185426503306 ],\n \"samples_ts\": [ 2.53348, 8.2666, 2.7612 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T05:14:11Z\",\n \"avg_ns\": 37435876239,\n \"stddev_ns\": 4250329280,\n \"avg_ts\": 4.251875,\n \"stddev_ts\": 1.971981,\n \"samples_ns\": [ 64815777546, 23744364178, 23747486995 ],\n \"samples_ts\": [ 1.97483, 5.39075, 5.39004 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-11T05:05:21Z", "avg_ns": 149818581561, "stddev_ns": 2261091159, "avg_ts": 4.520427, "stddev_ts": 3.246274, "samples_ns": [ 202093224473, 61936016904, 185426503306 ], "samples_ts": [ 2.53348, 8.2666, 2.7612 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-11T05:14:11Z", "avg_ns": 37435876239, "stddev_ns": 4250329280, "avg_ts": 4.251875, "stddev_ts": 1.971981, "samples_ns": [ 64815777546, 23744364178, 23747486995 ], "samples_ts": [ 1.97483, 5.39075, 5.39004 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 634 }, { "timestamp_utc": "2025-12-11T05:36:00.763051+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T05:16:05Z\",\n \"avg_ns\": 128208023593,\n \"stddev_ns\": 282319499,\n \"avg_ts\": 4.105766,\n \"stddev_ts\": 0.876242,\n \"samples_ns\": [ 100228068508, 146852444569, 137543557703 ],\n \"samples_ts\": [ 5.10835, 3.48649, 3.72246 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T05:25:34Z\",\n \"avg_ns\": 208816156525,\n \"stddev_ns\": 1943001344,\n \"avg_ts\": 2.505413,\n \"stddev_ts\": 0.447839,\n \"samples_ns\": [ 247983507860, 172972403431, 205492558284 ],\n \"samples_ts\": [ 2.06465, 2.96001, 2.49157 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-11T05:16:05Z", "avg_ns": 128208023593, "stddev_ns": 282319499, "avg_ts": 4.105766, "stddev_ts": 0.876242, "samples_ns": [ 100228068508, 146852444569, 137543557703 ], "samples_ts": [ 5.10835, 3.48649, 3.72246 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-11T05:25:34Z", "avg_ns": 208816156525, "stddev_ns": 1943001344, "avg_ts": 2.505413, "stddev_ts": 0.447839, "samples_ns": [ 247983507860, 172972403431, 205492558284 ], "samples_ts": [ 2.06465, 2.96001, 2.49157 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 635 }, { "timestamp_utc": "2025-12-11T05:41:00.009530+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T05:36:01Z\",\n \"avg_ns\": 50479254160,\n \"stddev_ns\": 41393706,\n \"avg_ts\": 2.535696,\n \"stddev_ts\": 0.002079,\n \"samples_ns\": [ 50470849702, 50524204546, 50442708233 ],\n \"samples_ts\": [ 2.53612, 2.53344, 2.53753 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T05:38:57Z\",\n \"avg_ns\": 40618357628,\n \"stddev_ns\": 4177192517,\n \"avg_ts\": 4.183458,\n \"stddev_ts\": 2.137016,\n \"samples_ns\": [ 74598724221, 23641567320, 23614781343 ],\n \"samples_ts\": [ 1.71585, 5.41419, 5.42033 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-11T05:36:01Z", "avg_ns": 50479254160, "stddev_ns": 41393706, "avg_ts": 2.535696, "stddev_ts": 0.002079, "samples_ns": [ 50470849702, 50524204546, 50442708233 ], "samples_ts": [ 2.53612, 2.53344, 2.53753 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-11T05:38:57Z", "avg_ns": 40618357628, "stddev_ns": 4177192517, "avg_ts": 4.183458, "stddev_ts": 2.137016, "samples_ns": [ 74598724221, 23641567320, 23614781343 ], "samples_ts": [ 1.71585, 5.41419, 5.42033 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 636 }, { "timestamp_utc": "2025-12-11T05:55:26.265204+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T05:41:00Z\",\n \"avg_ns\": 50917986730,\n \"stddev_ns\": 35882900,\n \"avg_ts\": 2.513847,\n \"stddev_ts\": 0.001771,\n \"samples_ns\": [ 50958828908, 50903604603, 50891526680 ],\n \"samples_ts\": [ 2.51183, 2.51456, 2.51515 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T05:43:56Z\",\n \"avg_ns\": 229678521223,\n \"stddev_ns\": 2480515687,\n \"avg_ts\": 2.280750,\n \"stddev_ts\": 0.427922,\n \"samples_ns\": [ 186759551415, 232200782896, 270075229359 ],\n \"samples_ts\": [ 2.74149, 2.20499, 1.89577 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-11T05:41:00Z", "avg_ns": 50917986730, "stddev_ns": 35882900, "avg_ts": 2.513847, "stddev_ts": 0.001771, "samples_ns": [ 50958828908, 50903604603, 50891526680 ], "samples_ts": [ 2.51183, 2.51456, 2.51515 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-11T05:43:56Z", "avg_ns": 229678521223, "stddev_ns": 2480515687, "avg_ts": 2.28075, "stddev_ts": 0.427922, "samples_ns": [ 186759551415, 232200782896, 270075229359 ], "samples_ts": [ 2.74149, 2.20499, 1.89577 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 637 }, { "timestamp_utc": "2025-12-11T06:06:10.039277+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T05:55:27Z\",\n \"avg_ns\": 150244496012,\n \"stddev_ns\": 4227324849,\n \"avg_ts\": 4.582318,\n \"stddev_ts\": 3.394085,\n \"samples_ns\": [ 192174438372, 60226943230, 198332106434 ],\n \"samples_ts\": [ 2.66425, 8.50118, 2.58153 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T06:04:30Z\",\n \"avg_ns\": 32877976953,\n \"stddev_ns\": 4101964900,\n \"avg_ts\": 4.382379,\n \"stddev_ts\": 1.594718,\n \"samples_ns\": [ 50347366355, 23862215834, 24424348670 ],\n \"samples_ts\": [ 2.54234, 5.36413, 5.24067 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-11T05:55:27Z", "avg_ns": 150244496012, "stddev_ns": 4227324849, "avg_ts": 4.582318, "stddev_ts": 3.394085, "samples_ns": [ 192174438372, 60226943230, 198332106434 ], "samples_ts": [ 2.66425, 8.50118, 2.58153 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-11T06:04:30Z", "avg_ns": 32877976953, "stddev_ns": 4101964900, "avg_ts": 4.382379, "stddev_ts": 1.594718, "samples_ns": [ 50347366355, 23862215834, 24424348670 ], "samples_ts": [ 2.54234, 5.36413, 5.24067 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 638 }, { "timestamp_utc": "2025-12-11T06:26:59.058208+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T06:06:10Z\",\n \"avg_ns\": 125825568828,\n \"stddev_ns\": 1325022270,\n \"avg_ts\": 4.437579,\n \"stddev_ts\": 1.497742,\n \"samples_ns\": [ 87493770366, 178496591024, 111486345094 ],\n \"samples_ts\": [ 5.85185, 2.8684, 4.59249 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T06:15:44Z\",\n \"avg_ns\": 224596510764,\n \"stddev_ns\": 975633322,\n \"avg_ts\": 2.331631,\n \"stddev_ts\": 0.416817,\n \"samples_ns\": [ 270951706130, 214436576475, 188401249688 ],\n \"samples_ts\": [ 1.88964, 2.38765, 2.7176 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-11T06:06:10Z", "avg_ns": 125825568828, "stddev_ns": 1325022270, "avg_ts": 4.437579, "stddev_ts": 1.497742, "samples_ns": [ 87493770366, 178496591024, 111486345094 ], "samples_ts": [ 5.85185, 2.8684, 4.59249 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-11T06:15:44Z", "avg_ns": 224596510764, "stddev_ns": 975633322, "avg_ts": 2.331631, "stddev_ts": 0.416817, "samples_ns": [ 270951706130, 214436576475, 188401249688 ], "samples_ts": [ 1.88964, 2.38765, 2.7176 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 639 }, { "timestamp_utc": "2025-12-11T06:31:55.648087+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T06:27:00Z\",\n \"avg_ns\": 46914909371,\n \"stddev_ns\": 4107812918,\n \"avg_ts\": 2.769045,\n \"stddev_ts\": 0.429155,\n \"samples_ns\": [ 50683872148, 50851976980, 39208878986 ],\n \"samples_ts\": [ 2.52546, 2.51711, 3.26457 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T06:30:12Z\",\n \"avg_ns\": 34351142337,\n \"stddev_ns\": 434982969,\n \"avg_ts\": 4.351505,\n \"stddev_ts\": 1.767674,\n \"samples_ns\": [ 23719869740, 23935833472, 55397723799 ],\n \"samples_ts\": [ 5.39632, 5.34763, 2.31056 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-11T06:27:00Z", "avg_ns": 46914909371, "stddev_ns": 4107812918, "avg_ts": 2.769045, "stddev_ts": 0.429155, "samples_ns": [ 50683872148, 50851976980, 39208878986 ], "samples_ts": [ 2.52546, 2.51711, 3.26457 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-11T06:30:12Z", "avg_ns": 34351142337, "stddev_ns": 434982969, "avg_ts": 4.351505, "stddev_ts": 1.767674, "samples_ns": [ 23719869740, 23935833472, 55397723799 ], "samples_ts": [ 5.39632, 5.34763, 2.31056 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 640 }, { "timestamp_utc": "2025-12-11T06:45:51.199405+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T06:31:57Z\",\n \"avg_ns\": 47681980319,\n \"stddev_ns\": 3700320709,\n \"avg_ts\": 2.711892,\n \"stddev_ts\": 0.346187,\n \"samples_ns\": [ 51004570617, 50905294951, 41136075389 ],\n \"samples_ts\": [ 2.50958, 2.51447, 3.11162 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T06:35:11Z\",\n \"avg_ns\": 213136154687,\n \"stddev_ns\": 3885954179,\n \"avg_ts\": 2.435895,\n \"stddev_ts\": 0.338896,\n \"samples_ns\": [ 200367218241, 189705517403, 249335728417 ],\n \"samples_ts\": [ 2.55531, 2.69892, 2.05346 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-11T06:31:57Z", "avg_ns": 47681980319, "stddev_ns": 3700320709, "avg_ts": 2.711892, "stddev_ts": 0.346187, "samples_ns": [ 51004570617, 50905294951, 41136075389 ], "samples_ts": [ 2.50958, 2.51447, 3.11162 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-11T06:35:11Z", "avg_ns": 213136154687, "stddev_ns": 3885954179, "avg_ts": 2.435895, "stddev_ts": 0.338896, "samples_ns": [ 200367218241, 189705517403, 249335728417 ], "samples_ts": [ 2.55531, 2.69892, 2.05346 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 641 }, { "timestamp_utc": "2025-12-11T06:57:15.456355+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T06:45:52Z\",\n \"avg_ns\": 138668392106,\n \"stddev_ns\": 1848158513,\n \"avg_ts\": 3.796989,\n \"stddev_ts\": 0.754593,\n \"samples_ns\": [ 131838734023, 113662113271, 170504329025 ],\n \"samples_ts\": [ 3.88353, 4.50458, 3.00286 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T06:55:20Z\",\n \"avg_ns\": 38233950526,\n \"stddev_ns\": 4079803435,\n \"avg_ts\": 4.350320,\n \"stddev_ts\": 2.160184,\n \"samples_ns\": [ 22857324224, 22877367900, 68967159456 ],\n \"samples_ts\": [ 5.59996, 5.59505, 1.85596 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-11T06:45:52Z", "avg_ns": 138668392106, "stddev_ns": 1848158513, "avg_ts": 3.796989, "stddev_ts": 0.754593, "samples_ns": [ 131838734023, 113662113271, 170504329025 ], "samples_ts": [ 3.88353, 4.50458, 3.00286 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-11T06:55:20Z", "avg_ns": 38233950526, "stddev_ns": 4079803435, "avg_ts": 4.35032, "stddev_ts": 2.160184, "samples_ns": [ 22857324224, 22877367900, 68967159456 ], "samples_ts": [ 5.59996, 5.59505, 1.85596 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 642 }, { "timestamp_utc": "2025-12-11T07:17:56.693672+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T06:57:16Z\",\n \"avg_ns\": 114519074035,\n \"stddev_ns\": 3887255302,\n \"avg_ts\": 5.693681,\n \"stddev_ts\": 2.772594,\n \"samples_ns\": [ 66457050401, 202312899732, 74787271974 ],\n \"samples_ts\": [ 7.70422, 2.53073, 6.84608 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T07:06:00Z\",\n \"avg_ns\": 238655835807,\n \"stddev_ns\": 3651353183,\n \"avg_ts\": 2.198358,\n \"stddev_ts\": 0.439636,\n \"samples_ns\": [ 270552245827, 255937051950, 189478209645 ],\n \"samples_ts\": [ 1.89243, 2.00049, 2.70216 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-11T06:57:16Z", "avg_ns": 114519074035, "stddev_ns": 3887255302, "avg_ts": 5.693681, "stddev_ts": 2.772594, "samples_ns": [ 66457050401, 202312899732, 74787271974 ], "samples_ts": [ 7.70422, 2.53073, 6.84608 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-11T07:06:00Z", "avg_ns": 238655835807, "stddev_ns": 3651353183, "avg_ts": 2.198358, "stddev_ts": 0.439636, "samples_ns": [ 270552245827, 255937051950, 189478209645 ], "samples_ts": [ 1.89243, 2.00049, 2.70216 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 643 }, { "timestamp_utc": "2025-12-11T07:22:58.828622+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T07:17:58Z\",\n \"avg_ns\": 33289266138,\n \"stddev_ns\": 4183598919,\n \"avg_ts\": 4.967186,\n \"stddev_ts\": 3.252531,\n \"samples_ns\": [ 50601750168, 34486054097, 14779994149 ],\n \"samples_ts\": [ 2.52956, 3.71165, 8.66036 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T07:20:28Z\",\n \"avg_ns\": 49833720900,\n \"stddev_ns\": 3506003057,\n \"avg_ts\": 3.466534,\n \"stddev_ts\": 1.978609,\n \"samples_ns\": [ 23846706548, 35423695526, 90230760626 ],\n \"samples_ts\": [ 5.36762, 3.6134, 1.41858 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-11T07:17:58Z", "avg_ns": 33289266138, "stddev_ns": 4183598919, "avg_ts": 4.967186, "stddev_ts": 3.252531, "samples_ns": [ 50601750168, 34486054097, 14779994149 ], "samples_ts": [ 2.52956, 3.71165, 8.66036 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-11T07:20:28Z", "avg_ns": 49833720900, "stddev_ns": 3506003057, "avg_ts": 3.466534, "stddev_ts": 1.978609, "samples_ns": [ 23846706548, 35423695526, 90230760626 ], "samples_ts": [ 5.36762, 3.6134, 1.41858 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 644 }, { "timestamp_utc": "2025-12-11T07:36:10.978509+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T07:23:00Z\",\n \"avg_ns\": 32826261882,\n \"stddev_ns\": 4231125906,\n \"avg_ts\": 5.014688,\n \"stddev_ts\": 3.206875,\n \"samples_ns\": [ 50730696093, 32920985162, 14827104391 ],\n \"samples_ts\": [ 2.52313, 3.8881, 8.63284 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T07:25:29Z\",\n \"avg_ns\": 213675429243,\n \"stddev_ns\": 4061683880,\n \"avg_ts\": 2.434074,\n \"stddev_ts\": 0.358611,\n \"samples_ns\": [ 252328629090, 188833428838, 199864229802 ],\n \"samples_ts\": [ 2.0291, 2.71138, 2.56174 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-11T07:23:00Z", "avg_ns": 32826261882, "stddev_ns": 4231125906, "avg_ts": 5.014688, "stddev_ts": 3.206875, "samples_ns": [ 50730696093, 32920985162, 14827104391 ], "samples_ts": [ 2.52313, 3.8881, 8.63284 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-11T07:25:29Z", "avg_ns": 213675429243, "stddev_ns": 4061683880, "avg_ts": 2.434074, "stddev_ts": 0.358611, "samples_ns": [ 252328629090, 188833428838, 199864229802 ], "samples_ts": [ 2.0291, 2.71138, 2.56174 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 645 }, { "timestamp_utc": "2025-12-11T07:48:45.153392+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T07:36:11Z\",\n \"avg_ns\": 123722291827,\n \"stddev_ns\": 2026332419,\n \"avg_ts\": 4.423013,\n \"stddev_ts\": 1.408796,\n \"samples_ns\": [ 86223168176, 162410189263, 122533518042 ],\n \"samples_ts\": [ 5.93808, 3.15251, 4.17845 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T07:45:40Z\",\n \"avg_ns\": 61263382270,\n \"stddev_ns\": 3946714855,\n \"avg_ts\": 2.879556,\n \"stddev_ts\": 2.176333,\n \"samples_ns\": [ 23792569719, 69253942440, 90743634651 ],\n \"samples_ts\": [ 5.37983, 1.84827, 1.41057 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-11T07:36:11Z", "avg_ns": 123722291827, "stddev_ns": 2026332419, "avg_ts": 4.423013, "stddev_ts": 1.408796, "samples_ns": [ 86223168176, 162410189263, 122533518042 ], "samples_ts": [ 5.93808, 3.15251, 4.17845 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-11T07:45:40Z", "avg_ns": 61263382270, "stddev_ns": 3946714855, "avg_ts": 2.879556, "stddev_ts": 2.176333, "samples_ns": [ 23792569719, 69253942440, 90743634651 ], "samples_ts": [ 5.37983, 1.84827, 1.41057 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 646 }, { "timestamp_utc": "2025-12-11T08:09:20.012129+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T07:48:46Z\",\n \"avg_ns\": 126728951964,\n \"stddev_ns\": 3476298254,\n \"avg_ts\": 4.198630,\n \"stddev_ts\": 1.044626,\n \"samples_ns\": [ 132065922574, 152710522352, 95410410968 ],\n \"samples_ts\": [ 3.87685, 3.35275, 5.36629 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T07:57:03Z\",\n \"avg_ns\": 245121915567,\n \"stddev_ns\": 1521927640,\n \"avg_ts\": 2.098638,\n \"stddev_ts\": 0.172235,\n \"samples_ns\": [ 231633918286, 269432745682, 234299082735 ],\n \"samples_ts\": [ 2.21038, 1.90029, 2.18524 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-11T07:48:46Z", "avg_ns": 126728951964, "stddev_ns": 3476298254, "avg_ts": 4.19863, "stddev_ts": 1.044626, "samples_ns": [ 132065922574, 152710522352, 95410410968 ], "samples_ts": [ 3.87685, 3.35275, 5.36629 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_type": "gemma3 4B Q2_K - Medium", "model_size": 1722623232, "model_n_params": 3880263168, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-11T07:57:03Z", "avg_ns": 245121915567, "stddev_ns": 1521927640, "avg_ts": 2.098638, "stddev_ts": 0.172235, "samples_ns": [ 231633918286, 269432745682, 234299082735 ], "samples_ts": [ 2.21038, 1.90029, 2.18524 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-4B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 647 }, { "timestamp_utc": "2025-12-11T08:26:35.911272+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T08:09:57Z\",\n \"avg_ns\": 128702126225,\n \"stddev_ns\": 4175189081,\n \"avg_ts\": 1.002694,\n \"stddev_ts\": 0.108400,\n \"samples_ns\": [ 144953224225, 124188969367, 116964185083 ],\n \"samples_ts\": [ 0.883043, 1.03069, 1.09435 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T08:18:05Z\",\n \"avg_ns\": 169642853490,\n \"stddev_ns\": 2369381036,\n \"avg_ts\": 0.756759,\n \"stddev_ts\": 0.050736,\n \"samples_ns\": [ 180017827169, 171147731220, 157763002083 ],\n \"samples_ts\": [ 0.711041, 0.747892, 0.811344 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-11T08:09:57Z", "avg_ns": 128702126225, "stddev_ns": 4175189081, "avg_ts": 1.002694, "stddev_ts": 0.1084, "samples_ns": [ 144953224225, 124188969367, 116964185083 ], "samples_ts": [ 0.883043, 1.03069, 1.09435 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-11T08:18:05Z", "avg_ns": 169642853490, "stddev_ns": 2369381036, "avg_ts": 0.756759, "stddev_ts": 0.050736, "samples_ns": [ 180017827169, 171147731220, 157763002083 ], "samples_ts": [ 0.711041, 0.747892, 0.811344 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-12B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 512, "ubatch_size": 128, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 648 }, { "timestamp_utc": "2025-12-11T09:10:59.503908+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T08:26:37Z\",\n \"avg_ns\": 131381880652,\n \"stddev_ns\": 2631910218,\n \"avg_ts\": 0.988409,\n \"stddev_ts\": 0.142520,\n \"samples_ns\": [ 127294046356, 114124180955, 152727414646 ],\n \"samples_ts\": [ 1.00555, 1.12159, 0.838094 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T08:35:30Z\",\n \"avg_ns\": 709135124910,\n \"stddev_ns\": 4294193782,\n \"avg_ts\": 0.722633,\n \"stddev_ts\": 0.026064,\n \"samples_ns\": [ 683825831211, 734997061770, 708582481750 ],\n \"samples_ts\": [ 0.748729, 0.696601, 0.722569 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-11T08:26:37Z", "avg_ns": 131381880652, "stddev_ns": 2631910218, "avg_ts": 0.988409, "stddev_ts": 0.14252, "samples_ns": [ 127294046356, 114124180955, 152727414646 ], "samples_ts": [ 1.00555, 1.12159, 0.838094 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-11T08:35:30Z", "avg_ns": 709135124910, "stddev_ns": 4294193782, "avg_ts": 0.722633, "stddev_ts": 0.026064, "samples_ns": [ 683825831211, 734997061770, 708582481750 ], "samples_ts": [ 0.748729, 0.696601, 0.722569 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-12B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 512, "ubatch_size": 128, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 649 }, { "timestamp_utc": "2025-12-11T09:53:29.428687+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T09:11:00Z\",\n \"avg_ns\": 513645990250,\n \"stddev_ns\": 1657075855,\n \"avg_ts\": 0.998439,\n \"stddev_ts\": 0.050084,\n \"samples_ns\": [ 485701928294, 520303267508, 534932774948 ],\n \"samples_ts\": [ 1.05414, 0.984041, 0.95713 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T09:45:28Z\",\n \"avg_ns\": 159997195438,\n \"stddev_ns\": 2109821286,\n \"avg_ts\": 0.812608,\n \"stddev_ts\": 0.122489,\n \"samples_ns\": [ 137531306776, 186548650960, 155911628579 ],\n \"samples_ts\": [ 0.930697, 0.686148, 0.820978 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-11T09:11:00Z", "avg_ns": 513645990250, "stddev_ns": 1657075855, "avg_ts": 0.998439, "stddev_ts": 0.050084, "samples_ns": [ 485701928294, 520303267508, 534932774948 ], "samples_ts": [ 1.05414, 0.984041, 0.95713 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-11T09:45:28Z", "avg_ns": 159997195438, "stddev_ns": 2109821286, "avg_ts": 0.812608, "stddev_ts": 0.122489, "samples_ns": [ 137531306776, 186548650960, 155911628579 ], "samples_ts": [ 0.930697, 0.686148, 0.820978 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-12B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 512, "ubatch_size": 128, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 650 }, { "timestamp_utc": "2025-12-11T11:03:13.864572+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T09:53:31Z\",\n \"avg_ns\": 511640724724,\n \"stddev_ns\": 3815490425,\n \"avg_ts\": 1.002264,\n \"stddev_ts\": 0.048742,\n \"samples_ns\": [ 534172135170, 515345170971, 485404868031 ],\n \"samples_ts\": [ 0.958493, 0.993509, 1.05479 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T10:27:29Z\",\n \"avg_ns\": 714188458371,\n \"stddev_ns\": 4287179111,\n \"avg_ts\": 0.717880,\n \"stddev_ts\": 0.032312,\n \"samples_ns\": [ 749659248510, 685636953586, 707269173018 ],\n \"samples_ts\": [ 0.682977, 0.746751, 0.723911 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-11T09:53:31Z", "avg_ns": 511640724724, "stddev_ns": 3815490425, "avg_ts": 1.002264, "stddev_ts": 0.048742, "samples_ns": [ 534172135170, 515345170971, 485404868031 ], "samples_ts": [ 0.958493, 0.993509, 1.05479 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 512, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-11T10:27:29Z", "avg_ns": 714188458371, "stddev_ns": 4287179111, "avg_ts": 0.71788, "stddev_ts": 0.032312, "samples_ns": [ 749659248510, 685636953586, 707269173018 ], "samples_ts": [ 0.682977, 0.746751, 0.723911 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-12B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 512, "ubatch_size": 128, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 651 }, { "timestamp_utc": "2025-12-11T11:20:33.087838+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T11:03:15Z\",\n \"avg_ns\": 120920870097,\n \"stddev_ns\": 3807252233,\n \"avg_ts\": 1.136627,\n \"stddev_ts\": 0.332620,\n \"samples_ns\": [ 96274888157, 170087916182, 96399805953 ],\n \"samples_ts\": [ 1.32953, 0.752552, 1.3278 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T11:11:43Z\",\n \"avg_ns\": 176085529551,\n \"stddev_ns\": 1832872339,\n \"avg_ts\": 0.760377,\n \"stddev_ts\": 0.210354,\n \"samples_ns\": [ 199428097208, 127585359217, 201243132229 ],\n \"samples_ts\": [ 0.641835, 1.00325, 0.636047 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-11T11:03:15Z", "avg_ns": 120920870097, "stddev_ns": 3807252233, "avg_ts": 1.136627, "stddev_ts": 0.33262, "samples_ns": [ 96274888157, 170087916182, 96399805953 ], "samples_ts": [ 1.32953, 0.752552, 1.3278 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-11T11:11:43Z", "avg_ns": 176085529551, "stddev_ns": 1832872339, "avg_ts": 0.760377, "stddev_ts": 0.210354, "samples_ns": [ 199428097208, 127585359217, 201243132229 ], "samples_ts": [ 0.641835, 1.00325, 0.636047 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-12B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 512, "ubatch_size": 256, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 652 }, { "timestamp_utc": "2025-12-11T12:04:19.201657+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T11:20:34Z\",\n \"avg_ns\": 134925043896,\n \"stddev_ns\": 4235108875,\n \"avg_ts\": 0.987433,\n \"stddev_ts\": 0.248287,\n \"samples_ns\": [ 164933290030, 101517410298, 138324431362 ],\n \"samples_ts\": [ 0.776071, 1.26087, 0.925361 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T11:28:55Z\",\n \"avg_ns\": 707262028836,\n \"stddev_ns\": 3832456312,\n \"avg_ts\": 0.724624,\n \"stddev_ts\": 0.027827,\n \"samples_ns\": [ 711164100209, 678600176127, 732021810174 ],\n \"samples_ts\": [ 0.719946, 0.754494, 0.699433 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-11T11:20:34Z", "avg_ns": 134925043896, "stddev_ns": 4235108875, "avg_ts": 0.987433, "stddev_ts": 0.248287, "samples_ns": [ 164933290030, 101517410298, 138324431362 ], "samples_ts": [ 0.776071, 1.26087, 0.925361 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-11T11:28:55Z", "avg_ns": 707262028836, "stddev_ns": 3832456312, "avg_ts": 0.724624, "stddev_ts": 0.027827, "samples_ns": [ 711164100209, 678600176127, 732021810174 ], "samples_ts": [ 0.719946, 0.754494, 0.699433 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-12B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 512, "ubatch_size": 256, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 653 }, { "timestamp_utc": "2025-12-11T12:46:38.933281+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T12:04:21Z\",\n \"avg_ns\": 517968325646,\n \"stddev_ns\": 2176125244,\n \"avg_ts\": 0.990170,\n \"stddev_ts\": 0.050838,\n \"samples_ns\": [ 530120867549, 535515244598, 488268864792 ],\n \"samples_ts\": [ 0.965817, 0.956089, 1.0486 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T12:38:20Z\",\n \"avg_ns\": 165342210986,\n \"stddev_ns\": 3839120142,\n \"avg_ts\": 0.776994,\n \"stddev_ts\": 0.058773,\n \"samples_ns\": [ 171359773715, 173136566015, 151530293229 ],\n \"samples_ts\": [ 0.746966, 0.739301, 0.844716 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-11T12:04:21Z", "avg_ns": 517968325646, "stddev_ns": 2176125244, "avg_ts": 0.99017, "stddev_ts": 0.050838, "samples_ns": [ 530120867549, 535515244598, 488268864792 ], "samples_ts": [ 0.965817, 0.956089, 1.0486 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-11T12:38:20Z", "avg_ns": 165342210986, "stddev_ns": 3839120142, "avg_ts": 0.776994, "stddev_ts": 0.058773, "samples_ns": [ 171359773715, 173136566015, 151530293229 ], "samples_ts": [ 0.746966, 0.739301, 0.844716 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-12B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 512, "ubatch_size": 256, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 654 }, { "timestamp_utc": "2025-12-11T13:55:18.908620+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T12:46:40Z\",\n \"avg_ns\": 509722745157,\n \"stddev_ns\": 2726071503,\n \"avg_ts\": 1.005776,\n \"stddev_ts\": 0.044099,\n \"samples_ns\": [ 503997196418, 490446628496, 534724410558 ],\n \"samples_ts\": [ 1.01588, 1.04395, 0.957503 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T13:21:03Z\",\n \"avg_ns\": 684638533145,\n \"stddev_ns\": 4251035505,\n \"avg_ts\": 0.748369,\n \"stddev_ts\": 0.024607,\n \"samples_ns\": [ 659187071577, 695742350001, 698986177857 ],\n \"samples_ts\": [ 0.776714, 0.735905, 0.732489 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-11T12:46:40Z", "avg_ns": 509722745157, "stddev_ns": 2726071503, "avg_ts": 1.005776, "stddev_ts": 0.044099, "samples_ns": [ 503997196418, 490446628496, 534724410558 ], "samples_ts": [ 1.01588, 1.04395, 0.957503 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 512, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-11T13:21:03Z", "avg_ns": 684638533145, "stddev_ns": 4251035505, "avg_ts": 0.748369, "stddev_ts": 0.024607, "samples_ns": [ 659187071577, 695742350001, 698986177857 ], "samples_ts": [ 0.776714, 0.735905, 0.732489 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-12B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 512, "ubatch_size": 256, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 655 }, { "timestamp_utc": "2025-12-11T14:11:47.346775+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T13:55:19Z\",\n \"avg_ns\": 131300331478,\n \"stddev_ns\": 3742408715,\n \"avg_ts\": 0.990740,\n \"stddev_ts\": 0.151486,\n \"samples_ns\": [ 153660382505, 112766500609, 127474111320 ],\n \"samples_ts\": [ 0.833006, 1.13509, 1.00413 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T14:03:30Z\",\n \"avg_ns\": 165051237166,\n \"stddev_ns\": 2295343619,\n \"avg_ts\": 0.781079,\n \"stddev_ts\": 0.082106,\n \"samples_ns\": [ 168295517107, 179984330478, 146873863914 ],\n \"samples_ts\": [ 0.760567, 0.711173, 0.871496 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-11T13:55:19Z", "avg_ns": 131300331478, "stddev_ns": 3742408715, "avg_ts": 0.99074, "stddev_ts": 0.151486, "samples_ns": [ 153660382505, 112766500609, 127474111320 ], "samples_ts": [ 0.833006, 1.13509, 1.00413 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-11T14:03:30Z", "avg_ns": 165051237166, "stddev_ns": 2295343619, "avg_ts": 0.781079, "stddev_ts": 0.082106, "samples_ns": [ 168295517107, 179984330478, 146873863914 ], "samples_ts": [ 0.760567, 0.711173, 0.871496 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-12B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 512, "ubatch_size": 512, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 656 }, { "timestamp_utc": "2025-12-11T14:55:30.841378+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T14:11:48Z\",\n \"avg_ns\": 128322107623,\n \"stddev_ns\": 3721064131,\n \"avg_ts\": 1.005110,\n \"stddev_ts\": 0.104180,\n \"samples_ns\": [ 118614749714, 121878036335, 144473536822 ],\n \"samples_ts\": [ 1.07912, 1.05023, 0.885975 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T14:20:41Z\",\n \"avg_ns\": 695949524315,\n \"stddev_ns\": 2209763256,\n \"avg_ts\": 0.736032,\n \"stddev_ts\": 0.019704,\n \"samples_ns\": [ 674823269329, 705034322257, 707990981361 ],\n \"samples_ts\": [ 0.758717, 0.726206, 0.723173 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-11T14:11:48Z", "avg_ns": 128322107623, "stddev_ns": 3721064131, "avg_ts": 1.00511, "stddev_ts": 0.10418, "samples_ns": [ 118614749714, 121878036335, 144473536822 ], "samples_ts": [ 1.07912, 1.05023, 0.885975 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-11T14:20:41Z", "avg_ns": 695949524315, "stddev_ns": 2209763256, "avg_ts": 0.736032, "stddev_ts": 0.019704, "samples_ns": [ 674823269329, 705034322257, 707990981361 ], "samples_ts": [ 0.758717, 0.726206, 0.723173 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-12B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 512, "ubatch_size": 512, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 657 }, { "timestamp_utc": "2025-12-11T15:37:35.568382+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T14:55:31Z\",\n \"avg_ns\": 519670420991,\n \"stddev_ns\": 4056098893,\n \"avg_ts\": 0.986617,\n \"stddev_ts\": 0.045714,\n \"samples_ns\": [ 492721372935, 535737072143, 530552817895 ],\n \"samples_ts\": [ 1.03913, 0.955693, 0.965031 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T15:29:56Z\",\n \"avg_ns\": 152323233429,\n \"stddev_ns\": 2268695219,\n \"avg_ts\": 0.874929,\n \"stddev_ts\": 0.198847,\n \"samples_ns\": [ 127784821213, 198218966870, 130965912206 ],\n \"samples_ts\": [ 1.00168, 0.645751, 0.977354 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-11T14:55:31Z", "avg_ns": 519670420991, "stddev_ns": 4056098893, "avg_ts": 0.986617, "stddev_ts": 0.045714, "samples_ns": [ 492721372935, 535737072143, 530552817895 ], "samples_ts": [ 1.03913, 0.955693, 0.965031 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-11T15:29:56Z", "avg_ns": 152323233429, "stddev_ns": 2268695219, "avg_ts": 0.874929, "stddev_ts": 0.198847, "samples_ns": [ 127784821213, 198218966870, 130965912206 ], "samples_ts": [ 1.00168, 0.645751, 0.977354 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-12B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 512, "ubatch_size": 512, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 658 }, { "timestamp_utc": "2025-12-11T16:45:24.295728+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T15:37:37Z\",\n \"avg_ns\": 511389065987,\n \"stddev_ns\": 3327876219,\n \"avg_ts\": 1.002350,\n \"stddev_ts\": 0.041177,\n \"samples_ns\": [ 536233478163, 498672959138, 499260760660 ],\n \"samples_ts\": [ 0.954808, 1.02673, 1.02552 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T16:12:01Z\",\n \"avg_ns\": 667157147430,\n \"stddev_ns\": 3689552768,\n \"avg_ts\": 0.767514,\n \"stddev_ts\": 0.009486,\n \"samples_ns\": [ 662787633835, 676733354947, 661950453510 ],\n \"samples_ts\": [ 0.772495, 0.756576, 0.773472 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-11T15:37:37Z", "avg_ns": 511389065987, "stddev_ns": 3327876219, "avg_ts": 1.00235, "stddev_ts": 0.041177, "samples_ns": [ 536233478163, 498672959138, 499260760660 ], "samples_ts": [ 0.954808, 1.02673, 1.02552 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 512, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-11T16:12:01Z", "avg_ns": 667157147430, "stddev_ns": 3689552768, "avg_ts": 0.767514, "stddev_ts": 0.009486, "samples_ns": [ 662787633835, 676733354947, 661950453510 ], "samples_ts": [ 0.772495, 0.756576, 0.773472 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-12B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 512, "ubatch_size": 512, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 659 }, { "timestamp_utc": "2025-12-11T17:01:33.912045+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T16:45:25Z\",\n \"avg_ns\": 132017008472,\n \"stddev_ns\": 4165598947,\n \"avg_ts\": 0.990677,\n \"stddev_ts\": 0.177315,\n \"samples_ns\": [ 156503115530, 109197845764, 130350064124 ],\n \"samples_ts\": [ 0.817875, 1.17218, 0.981971 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T16:53:37Z\",\n \"avg_ns\": 158263813611,\n \"stddev_ns\": 4165922581,\n \"avg_ts\": 0.810972,\n \"stddev_ts\": 0.052053,\n \"samples_ns\": [ 159556187072, 167573157367, 147662096395 ],\n \"samples_ts\": [ 0.802225, 0.763845, 0.866844 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-11T16:45:25Z", "avg_ns": 132017008472, "stddev_ns": 4165598947, "avg_ts": 0.990677, "stddev_ts": 0.177315, "samples_ns": [ 156503115530, 109197845764, 130350064124 ], "samples_ts": [ 0.817875, 1.17218, 0.981971 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-11T16:53:37Z", "avg_ns": 158263813611, "stddev_ns": 4165922581, "avg_ts": 0.810972, "stddev_ts": 0.052053, "samples_ns": [ 159556187072, 167573157367, 147662096395 ], "samples_ts": [ 0.802225, 0.763845, 0.866844 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-12B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 660 }, { "timestamp_utc": "2025-12-11T17:43:04.187827+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T17:01:34Z\",\n \"avg_ns\": 133645263819,\n \"stddev_ns\": 4123240685,\n \"avg_ts\": 0.971518,\n \"stddev_ts\": 0.139150,\n \"samples_ns\": [ 129255037036, 116328744816, 155352009606 ],\n \"samples_ts\": [ 0.99029, 1.10033, 0.823935 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T17:10:33Z\",\n \"avg_ns\": 649913717192,\n \"stddev_ns\": 1563606119,\n \"avg_ts\": 0.787846,\n \"stddev_ts\": 0.007580,\n \"samples_ns\": [ 648473588815, 644487003828, 656780558934 ],\n \"samples_ts\": [ 0.789546, 0.79443, 0.77956 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-11T17:01:34Z", "avg_ns": 133645263819, "stddev_ns": 4123240685, "avg_ts": 0.971518, "stddev_ts": 0.13915, "samples_ns": [ 129255037036, 116328744816, 155352009606 ], "samples_ts": [ 0.99029, 1.10033, 0.823935 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-11T17:10:33Z", "avg_ns": 649913717192, "stddev_ns": 1563606119, "avg_ts": 0.787846, "stddev_ts": 0.00758, "samples_ns": [ 648473588815, 644487003828, 656780558934 ], "samples_ts": [ 0.789546, 0.79443, 0.77956 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-12B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 661 }, { "timestamp_utc": "2025-12-11T18:25:44.897029+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T17:43:06Z\",\n \"avg_ns\": 509829805709,\n \"stddev_ns\": 3534471142,\n \"avg_ts\": 1.005313,\n \"stddev_ts\": 0.039500,\n \"samples_ns\": [ 533215449962, 500992607513, 495281359652 ],\n \"samples_ts\": [ 0.960212, 1.02197, 1.03376 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T18:17:08Z\",\n \"avg_ns\": 171703742600,\n \"stddev_ns\": 1542966209,\n \"avg_ts\": 0.772387,\n \"stddev_ts\": 0.188233,\n \"samples_ns\": [ 196890048829, 129404463554, 188816715418 ],\n \"samples_ts\": [ 0.650109, 0.989147, 0.677906 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-11T17:43:06Z", "avg_ns": 509829805709, "stddev_ns": 3534471142, "avg_ts": 1.005313, "stddev_ts": 0.0395, "samples_ns": [ 533215449962, 500992607513, 495281359652 ], "samples_ts": [ 0.960212, 1.02197, 1.03376 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-11T18:17:08Z", "avg_ns": 171703742600, "stddev_ns": 1542966209, "avg_ts": 0.772387, "stddev_ts": 0.188233, "samples_ns": [ 196890048829, 129404463554, 188816715418 ], "samples_ts": [ 0.650109, 0.989147, 0.677906 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-12B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 662 }, { "timestamp_utc": "2025-12-11T19:31:03.653562+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T18:25:46Z\",\n \"avg_ns\": 515277420197,\n \"stddev_ns\": 4272980258,\n \"avg_ts\": 0.995759,\n \"stddev_ts\": 0.057184,\n \"samples_ns\": [ 482224373113, 532950148933, 530657738545 ],\n \"samples_ts\": [ 1.06175, 0.96069, 0.96484 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T19:00:03Z\",\n \"avg_ns\": 619530525395,\n \"stddev_ns\": 1597316358,\n \"avg_ts\": 0.826436,\n \"stddev_ts\": 0.002129,\n \"samples_ns\": [ 621299774329, 619097273248, 618194528610 ],\n \"samples_ts\": [ 0.824079, 0.827011, 0.828218 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-11T18:25:46Z", "avg_ns": 515277420197, "stddev_ns": 4272980258, "avg_ts": 0.995759, "stddev_ts": 0.057184, "samples_ns": [ 482224373113, 532950148933, 530657738545 ], "samples_ts": [ 1.06175, 0.96069, 0.96484 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 1024, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-11T19:00:03Z", "avg_ns": 619530525395, "stddev_ns": 1597316358, "avg_ts": 0.826436, "stddev_ts": 0.002129, "samples_ns": [ 621299774329, 619097273248, 618194528610 ], "samples_ts": [ 0.824079, 0.827011, 0.828218 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-12B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 663 }, { "timestamp_utc": "2025-12-11T19:47:12.458216+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T19:31:04Z\",\n \"avg_ns\": 136537663098,\n \"stddev_ns\": 4158418861,\n \"avg_ts\": 0.990096,\n \"stddev_ts\": 0.302535,\n \"samples_ns\": [ 152981543816, 95622143329, 161009302149 ],\n \"samples_ts\": [ 0.836702, 1.3386, 0.794985 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T19:39:47Z\",\n \"avg_ns\": 147655052480,\n \"stddev_ns\": 3521416996,\n \"avg_ts\": 0.907500,\n \"stddev_ts\": 0.219740,\n \"samples_ns\": [ 128503290938, 194737442534, 119724423969 ],\n \"samples_ts\": [ 0.996083, 0.657295, 1.06912 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-11T19:31:04Z", "avg_ns": 136537663098, "stddev_ns": 4158418861, "avg_ts": 0.990096, "stddev_ts": 0.302535, "samples_ns": [ 152981543816, 95622143329, 161009302149 ], "samples_ts": [ 0.836702, 1.3386, 0.794985 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-11T19:39:47Z", "avg_ns": 147655052480, "stddev_ns": 3521416996, "avg_ts": 0.9075, "stddev_ts": 0.21974, "samples_ns": [ 128503290938, 194737442534, 119724423969 ], "samples_ts": [ 0.996083, 0.657295, 1.06912 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-12B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 664 }, { "timestamp_utc": "2025-12-11T20:28:02.825052+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T19:47:13Z\",\n \"avg_ns\": 121744043787,\n \"stddev_ns\": 1034136466,\n \"avg_ts\": 1.072640,\n \"stddev_ts\": 0.190873,\n \"samples_ns\": [ 99597682324, 139765259149, 125869189889 ],\n \"samples_ts\": [ 1.28517, 0.915821, 1.01693 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T19:56:05Z\",\n \"avg_ns\": 638597984215,\n \"stddev_ns\": 3852317935,\n \"avg_ts\": 0.801919,\n \"stddev_ts\": 0.013929,\n \"samples_ns\": [ 631658751886, 632612120049, 651523080711 ],\n \"samples_ts\": [ 0.810564, 0.809343, 0.785851 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-11T19:47:13Z", "avg_ns": 121744043787, "stddev_ns": 1034136466, "avg_ts": 1.07264, "stddev_ts": 0.190873, "samples_ns": [ 99597682324, 139765259149, 125869189889 ], "samples_ts": [ 1.28517, 0.915821, 1.01693 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-11T19:56:05Z", "avg_ns": 638597984215, "stddev_ns": 3852317935, "avg_ts": 0.801919, "stddev_ts": 0.013929, "samples_ns": [ 631658751886, 632612120049, 651523080711 ], "samples_ts": [ 0.810564, 0.809343, 0.785851 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-12B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 665 }, { "timestamp_utc": "2025-12-11T21:10:40.080125+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T20:28:04Z\",\n \"avg_ns\": 508599877268,\n \"stddev_ns\": 4284673799,\n \"avg_ts\": 1.007927,\n \"stddev_ts\": 0.042870,\n \"samples_ns\": [ 533794096001, 499564917503, 492440618301 ],\n \"samples_ts\": [ 0.959171, 1.02489, 1.03972 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T21:02:03Z\",\n \"avg_ns\": 171639398009,\n \"stddev_ns\": 891310525,\n \"avg_ts\": 0.778833,\n \"stddev_ts\": 0.211376,\n \"samples_ns\": [ 196537570595, 125143745602, 193236877831 ],\n \"samples_ts\": [ 0.651275, 1.02282, 0.662399 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-11T20:28:04Z", "avg_ns": 508599877268, "stddev_ns": 4284673799, "avg_ts": 1.007927, "stddev_ts": 0.04287, "samples_ns": [ 533794096001, 499564917503, 492440618301 ], "samples_ts": [ 0.959171, 1.02489, 1.03972 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-11T21:02:03Z", "avg_ns": 171639398009, "stddev_ns": 891310525, "avg_ts": 0.778833, "stddev_ts": 0.211376, "samples_ns": [ 196537570595, 125143745602, 193236877831 ], "samples_ts": [ 0.651275, 1.02282, 0.662399 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-12B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 666 }, { "timestamp_utc": "2025-12-11T22:17:44.546004+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T21:10:41Z\",\n \"avg_ns\": 516447022332,\n \"stddev_ns\": 605810764,\n \"avg_ts\": 0.993181,\n \"stddev_ts\": 0.052421,\n \"samples_ns\": [ 486012343833, 534241382909, 529087340256 ],\n \"samples_ts\": [ 1.05347, 0.958368, 0.967704 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T21:44:58Z\",\n \"avg_ns\": 654540343903,\n \"stddev_ns\": 3879268703,\n \"avg_ts\": 0.782325,\n \"stddev_ts\": 0.010632,\n \"samples_ns\": [ 646391237885, 653155374778, 664074419048 ],\n \"samples_ts\": [ 0.79209, 0.783887, 0.770998 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-11T21:10:41Z", "avg_ns": 516447022332, "stddev_ns": 605810764, "avg_ts": 0.993181, "stddev_ts": 0.052421, "samples_ns": [ 486012343833, 534241382909, 529087340256 ], "samples_ts": [ 1.05347, 0.958368, 0.967704 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 1024, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-11T21:44:58Z", "avg_ns": 654540343903, "stddev_ns": 3879268703, "avg_ts": 0.782325, "stddev_ts": 0.010632, "samples_ns": [ 646391237885, 653155374778, 664074419048 ], "samples_ts": [ 0.79209, 0.783887, 0.770998 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-12B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 667 }, { "timestamp_utc": "2025-12-11T22:34:59.724816+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T22:17:46Z\",\n \"avg_ns\": 120850213179,\n \"stddev_ns\": 1851761473,\n \"avg_ts\": 1.106124,\n \"stddev_ts\": 0.265855,\n \"samples_ns\": [ 95917024374, 157195642576, 109437972587 ],\n \"samples_ts\": [ 1.33449, 0.814272, 1.16961 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T22:26:27Z\",\n \"avg_ns\": 170193251282,\n \"stddev_ns\": 950050981,\n \"avg_ts\": 0.765818,\n \"stddev_ts\": 0.130785,\n \"samples_ns\": [ 179573364849, 139923207836, 191083181162 ],\n \"samples_ts\": [ 0.712801, 0.914787, 0.669865 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-11T22:17:46Z", "avg_ns": 120850213179, "stddev_ns": 1851761473, "avg_ts": 1.106124, "stddev_ts": 0.265855, "samples_ns": [ 95917024374, 157195642576, 109437972587 ], "samples_ts": [ 1.33449, 0.814272, 1.16961 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-11T22:26:27Z", "avg_ns": 170193251282, "stddev_ns": 950050981, "avg_ts": 0.765818, "stddev_ts": 0.130785, "samples_ns": [ 179573364849, 139923207836, 191083181162 ], "samples_ts": [ 0.712801, 0.914787, 0.669865 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-12B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 668 }, { "timestamp_utc": "2025-12-11T23:15:55.064342+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T22:35:01Z\",\n \"avg_ns\": 126768376177,\n \"stddev_ns\": 548898757,\n \"avg_ts\": 1.017004,\n \"stddev_ts\": 0.104786,\n \"samples_ns\": [ 140773219253, 125099212203, 114432697076 ],\n \"samples_ts\": [ 0.909264, 1.02319, 1.11856 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T22:43:01Z\",\n \"avg_ns\": 657198611192,\n \"stddev_ns\": 3778619039,\n \"avg_ts\": 0.779204,\n \"stddev_ts\": 0.012836,\n \"samples_ns\": [ 666180317565, 660139565765, 645275950247 ],\n \"samples_ts\": [ 0.768561, 0.775594, 0.793459 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-11T22:35:01Z", "avg_ns": 126768376177, "stddev_ns": 548898757, "avg_ts": 1.017004, "stddev_ts": 0.104786, "samples_ns": [ 140773219253, 125099212203, 114432697076 ], "samples_ts": [ 0.909264, 1.02319, 1.11856 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-11T22:43:01Z", "avg_ns": 657198611192, "stddev_ns": 3778619039, "avg_ts": 0.779204, "stddev_ts": 0.012836, "samples_ns": [ 666180317565, 660139565765, 645275950247 ], "samples_ts": [ 0.768561, 0.775594, 0.793459 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-12B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 669 }, { "timestamp_utc": "2025-12-11T23:58:24.880887+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T23:15:56Z\",\n \"avg_ns\": 517536368062,\n \"stddev_ns\": 1312013597,\n \"avg_ts\": 0.990763,\n \"stddev_ts\": 0.046986,\n \"samples_ns\": [ 491042162842, 523277697206, 538289244138 ],\n \"samples_ts\": [ 1.04268, 0.978448, 0.951161 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T23:50:35Z\",\n \"avg_ns\": 155884800607,\n \"stddev_ns\": 4180179800,\n \"avg_ts\": 0.832168,\n \"stddev_ts\": 0.115090,\n \"samples_ns\": [ 136737087836, 180668405266, 150248908721 ],\n \"samples_ts\": [ 0.936103, 0.70848, 0.85192 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-11T23:15:56Z", "avg_ns": 517536368062, "stddev_ns": 1312013597, "avg_ts": 0.990763, "stddev_ts": 0.046986, "samples_ns": [ 491042162842, 523277697206, 538289244138 ], "samples_ts": [ 1.04268, 0.978448, 0.951161 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-11T23:50:35Z", "avg_ns": 155884800607, "stddev_ns": 4180179800, "avg_ts": 0.832168, "stddev_ts": 0.11509, "samples_ns": [ 136737087836, 180668405266, 150248908721 ], "samples_ts": [ 0.936103, 0.70848, 0.85192 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-12B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 670 }, { "timestamp_utc": "2025-12-12T01:06:48.037318+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T23:58:26Z\",\n \"avg_ns\": 517095396344,\n \"stddev_ns\": 272528241,\n \"avg_ts\": 0.991631,\n \"stddev_ts\": 0.047414,\n \"samples_ns\": [ 537760033568, 523197786668, 490328368797 ],\n \"samples_ts\": [ 0.952098, 0.978597, 1.0442 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T00:32:47Z\",\n \"avg_ns\": 679642554211,\n \"stddev_ns\": 1833512865,\n \"avg_ts\": 0.753928,\n \"stddev_ts\": 0.025752,\n \"samples_ns\": [ 704764967292, 675694650697, 658468044646 ],\n \"samples_ts\": [ 0.726483, 0.757739, 0.777562 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-11T23:58:26Z", "avg_ns": 517095396344, "stddev_ns": 272528241, "avg_ts": 0.991631, "stddev_ts": 0.047414, "samples_ns": [ 537760033568, 523197786668, 490328368797 ], "samples_ts": [ 0.952098, 0.978597, 1.0442 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 1024, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-12T00:32:47Z", "avg_ns": 679642554211, "stddev_ns": 1833512865, "avg_ts": 0.753928, "stddev_ts": 0.025752, "samples_ns": [ 704764967292, 675694650697, 658468044646 ], "samples_ts": [ 0.726483, 0.757739, 0.777562 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-12B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 671 }, { "timestamp_utc": "2025-12-12T01:24:02.303179+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T01:06:49Z\",\n \"avg_ns\": 128736984643,\n \"stddev_ns\": 4124047376,\n \"avg_ts\": 1.001925,\n \"stddev_ts\": 0.104091,\n \"samples_ns\": [ 119391421430, 121749125470, 145070407031 ],\n \"samples_ts\": [ 1.0721, 1.05134, 0.88233 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T01:15:42Z\",\n \"avg_ns\": 166028785437,\n \"stddev_ns\": 3539712165,\n \"avg_ts\": 0.774801,\n \"stddev_ts\": 0.068350,\n \"samples_ns\": [ 150128983929, 176697920786, 171259451597 ],\n \"samples_ts\": [ 0.8526, 0.7244, 0.747404 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-12T01:06:49Z", "avg_ns": 128736984643, "stddev_ns": 4124047376, "avg_ts": 1.001925, "stddev_ts": 0.104091, "samples_ns": [ 119391421430, 121749125470, 145070407031 ], "samples_ts": [ 1.0721, 1.05134, 0.88233 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-12T01:15:42Z", "avg_ns": 166028785437, "stddev_ns": 3539712165, "avg_ts": 0.774801, "stddev_ts": 0.06835, "samples_ns": [ 150128983929, 176697920786, 171259451597 ], "samples_ts": [ 0.8526, 0.7244, 0.747404 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-12B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 672 }, { "timestamp_utc": "2025-12-12T02:07:36.604487+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T01:24:04Z\",\n \"avg_ns\": 120953801111,\n \"stddev_ns\": 4162853544,\n \"avg_ts\": 1.093628,\n \"stddev_ts\": 0.238275,\n \"samples_ns\": [ 116584911316, 150016433633, 96260058384 ],\n \"samples_ts\": [ 1.09791, 0.85324, 1.32973 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T01:32:12Z\",\n \"avg_ns\": 707653692988,\n \"stddev_ns\": 3948731739,\n \"avg_ts\": 0.724239,\n \"stddev_ts\": 0.027727,\n \"samples_ns\": [ 739067531014, 696696900428, 687196647524 ],\n \"samples_ts\": [ 0.692765, 0.734896, 0.745056 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-12T01:24:04Z", "avg_ns": 120953801111, "stddev_ns": 4162853544, "avg_ts": 1.093628, "stddev_ts": 0.238275, "samples_ns": [ 116584911316, 150016433633, 96260058384 ], "samples_ts": [ 1.09791, 0.85324, 1.32973 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-12T01:32:12Z", "avg_ns": 707653692988, "stddev_ns": 3948731739, "avg_ts": 0.724239, "stddev_ts": 0.027727, "samples_ns": [ 739067531014, 696696900428, 687196647524 ], "samples_ts": [ 0.692765, 0.734896, 0.745056 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-12B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 673 }, { "timestamp_utc": "2025-12-12T02:51:07.825165+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T02:07:38Z\",\n \"avg_ns\": 511538431018,\n \"stddev_ns\": 3960822846,\n \"avg_ts\": 1.002046,\n \"stddev_ts\": 0.041006,\n \"samples_ns\": [ 536115225918, 496927138882, 501572928254 ],\n \"samples_ts\": [ 0.955019, 1.03033, 1.02079 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T02:42:00Z\",\n \"avg_ns\": 182029834398,\n \"stddev_ns\": 1437742509,\n \"avg_ts\": 0.717382,\n \"stddev_ts\": 0.128450,\n \"samples_ns\": [ 207049878900, 148408702166, 190630922130 ],\n \"samples_ts\": [ 0.618209, 0.862483, 0.671455 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-12T02:07:38Z", "avg_ns": 511538431018, "stddev_ns": 3960822846, "avg_ts": 1.002046, "stddev_ts": 0.041006, "samples_ns": [ 536115225918, 496927138882, 501572928254 ], "samples_ts": [ 0.955019, 1.03033, 1.02079 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-12T02:42:00Z", "avg_ns": 182029834398, "stddev_ns": 1437742509, "avg_ts": 0.717382, "stddev_ts": 0.12845, "samples_ns": [ 207049878900, 148408702166, 190630922130 ], "samples_ts": [ 0.618209, 0.862483, 0.671455 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-12B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 674 }, { "timestamp_utc": "2025-12-12T04:01:09.202591+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T02:51:08Z\",\n \"avg_ns\": 511891066004,\n \"stddev_ns\": 2169180092,\n \"avg_ts\": 1.001817,\n \"stddev_ts\": 0.049373,\n \"samples_ns\": [ 485402853656, 515314162931, 534956181427 ],\n \"samples_ts\": [ 1.05479, 0.993569, 0.957088 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T03:25:36Z\",\n \"avg_ns\": 710510124922,\n \"stddev_ns\": 4175543284,\n \"avg_ts\": 0.721321,\n \"stddev_ts\": 0.027634,\n \"samples_ns\": [ 685892313582, 740161967125, 705476094060 ],\n \"samples_ts\": [ 0.746473, 0.69174, 0.725751 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-12T02:51:08Z", "avg_ns": 511891066004, "stddev_ns": 2169180092, "avg_ts": 1.001817, "stddev_ts": 0.049373, "samples_ns": [ 485402853656, 515314162931, 534956181427 ], "samples_ts": [ 1.05479, 0.993569, 0.957088 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 2048, "n_ubatch": 128, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-12T03:25:36Z", "avg_ns": 710510124922, "stddev_ns": 4175543284, "avg_ts": 0.721321, "stddev_ts": 0.027634, "samples_ns": [ 685892313582, 740161967125, 705476094060 ], "samples_ts": [ 0.746473, 0.69174, 0.725751 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-12B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 675 }, { "timestamp_utc": "2025-12-12T04:17:56.978822+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T04:01:10Z\",\n \"avg_ns\": 136994229024,\n \"stddev_ns\": 1664880068,\n \"avg_ts\": 0.986924,\n \"stddev_ts\": 0.298612,\n \"samples_ns\": [ 147824245606, 96454484686, 166703956781 ],\n \"samples_ts\": [ 0.865893, 1.32705, 0.767828 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T04:10:02Z\",\n \"avg_ns\": 157465047232,\n \"stddev_ns\": 1492571004,\n \"avg_ts\": 0.843126,\n \"stddev_ts\": 0.185907,\n \"samples_ns\": [ 128572455035, 201255921247, 142566765415 ],\n \"samples_ts\": [ 0.995548, 0.636006, 0.897825 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-12T04:01:10Z", "avg_ns": 136994229024, "stddev_ns": 1664880068, "avg_ts": 0.986924, "stddev_ts": 0.298612, "samples_ns": [ 147824245606, 96454484686, 166703956781 ], "samples_ts": [ 0.865893, 1.32705, 0.767828 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-12T04:10:02Z", "avg_ns": 157465047232, "stddev_ns": 1492571004, "avg_ts": 0.843126, "stddev_ts": 0.185907, "samples_ns": [ 128572455035, 201255921247, 142566765415 ], "samples_ts": [ 0.995548, 0.636006, 0.897825 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-12B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 676 }, { "timestamp_utc": "2025-12-12T05:01:58.747371+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T04:17:58Z\",\n \"avg_ns\": 120861173607,\n \"stddev_ns\": 3621484899,\n \"avg_ts\": 1.115367,\n \"stddev_ts\": 0.286439,\n \"samples_ns\": [ 95957907354, 161801183647, 104824429822 ],\n \"samples_ts\": [ 1.33392, 0.791094, 1.22109 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T04:26:34Z\",\n \"avg_ns\": 707612874408,\n \"stddev_ns\": 4011783685,\n \"avg_ts\": 0.724096,\n \"stddev_ts\": 0.024240,\n \"samples_ns\": [ 711807114850, 728707482140, 682324026236 ],\n \"samples_ts\": [ 0.719296, 0.702614, 0.750377 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-12T04:17:58Z", "avg_ns": 120861173607, "stddev_ns": 3621484899, "avg_ts": 1.115367, "stddev_ts": 0.286439, "samples_ns": [ 95957907354, 161801183647, 104824429822 ], "samples_ts": [ 1.33392, 0.791094, 1.22109 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-12T04:26:34Z", "avg_ns": 707612874408, "stddev_ns": 4011783685, "avg_ts": 0.724096, "stddev_ts": 0.02424, "samples_ns": [ 711807114850, 728707482140, 682324026236 ], "samples_ts": [ 0.719296, 0.702614, 0.750377 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-12B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 677 }, { "timestamp_utc": "2025-12-12T05:45:14.845435+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T05:01:59Z\",\n \"avg_ns\": 510926114847,\n \"stddev_ns\": 659437316,\n \"avg_ts\": 1.003411,\n \"stddev_ts\": 0.044944,\n \"samples_ns\": [ 521757956180, 485249385793, 525771002570 ],\n \"samples_ts\": [ 0.981298, 1.05513, 0.973808 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T05:36:28Z\",\n \"avg_ns\": 175129717912,\n \"stddev_ns\": 3916820766,\n \"avg_ts\": 0.747180,\n \"stddev_ts\": 0.140790,\n \"samples_ns\": [ 184013228532, 141204615681, 200171309524 ],\n \"samples_ts\": [ 0.695602, 0.906486, 0.639452 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-12T05:01:59Z", "avg_ns": 510926114847, "stddev_ns": 659437316, "avg_ts": 1.003411, "stddev_ts": 0.044944, "samples_ns": [ 521757956180, 485249385793, 525771002570 ], "samples_ts": [ 0.981298, 1.05513, 0.973808 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-12T05:36:28Z", "avg_ns": 175129717912, "stddev_ns": 3916820766, "avg_ts": 0.74718, "stddev_ts": 0.14079, "samples_ns": [ 184013228532, 141204615681, 200171309524 ], "samples_ts": [ 0.695602, 0.906486, 0.639452 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-12B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 678 }, { "timestamp_utc": "2025-12-12T06:53:46.889766+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T05:45:15Z\",\n \"avg_ns\": 531768462785,\n \"stddev_ns\": 2066691201,\n \"avg_ts\": 0.964380,\n \"stddev_ts\": 0.047349,\n \"samples_ns\": [ 506486469187, 558811415788, 530007503381 ],\n \"samples_ts\": [ 1.01089, 0.91623, 0.966024 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T06:20:00Z\",\n \"avg_ns\": 674797894294,\n \"stddev_ns\": 2395400130,\n \"avg_ts\": 0.759325,\n \"stddev_ts\": 0.025440,\n \"samples_ns\": [ 661487146945, 661488595378, 701417940559 ],\n \"samples_ts\": [ 0.774014, 0.774012, 0.72995 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-12T05:45:15Z", "avg_ns": 531768462785, "stddev_ns": 2066691201, "avg_ts": 0.96438, "stddev_ts": 0.047349, "samples_ns": [ 506486469187, 558811415788, 530007503381 ], "samples_ts": [ 1.01089, 0.91623, 0.966024 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 2048, "n_ubatch": 256, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-12T06:20:00Z", "avg_ns": 674797894294, "stddev_ns": 2395400130, "avg_ts": 0.759325, "stddev_ts": 0.02544, "samples_ns": [ 661487146945, 661488595378, 701417940559 ], "samples_ts": [ 0.774014, 0.774012, 0.72995 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-12B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 679 }, { "timestamp_utc": "2025-12-12T07:10:47.418741+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T06:53:48Z\",\n \"avg_ns\": 120981850861,\n \"stddev_ns\": 2112998401,\n \"avg_ts\": 1.100027,\n \"stddev_ts\": 0.253261,\n \"samples_ns\": [ 111599222153, 154854551450, 96491778982 ],\n \"samples_ts\": [ 1.14696, 0.826582, 1.32654 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T07:02:01Z\",\n \"avg_ns\": 174879607259,\n \"stddev_ns\": 3959774283,\n \"avg_ts\": 0.752647,\n \"stddev_ts\": 0.161131,\n \"samples_ns\": [ 200571834404, 136619652994, 187447334381 ],\n \"samples_ts\": [ 0.638175, 0.936908, 0.682858 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-12T06:53:48Z", "avg_ns": 120981850861, "stddev_ns": 2112998401, "avg_ts": 1.100027, "stddev_ts": 0.253261, "samples_ns": [ 111599222153, 154854551450, 96491778982 ], "samples_ts": [ 1.14696, 0.826582, 1.32654 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-12T07:02:01Z", "avg_ns": 174879607259, "stddev_ns": 3959774283, "avg_ts": 0.752647, "stddev_ts": 0.161131, "samples_ns": [ 200571834404, 136619652994, 187447334381 ], "samples_ts": [ 0.638175, 0.936908, 0.682858 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-12B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 680 }, { "timestamp_utc": "2025-12-12T07:54:01.769980+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T07:10:48Z\",\n \"avg_ns\": 136974587914,\n \"stddev_ns\": 4042742778,\n \"avg_ts\": 0.985817,\n \"stddev_ts\": 0.297435,\n \"samples_ns\": [ 162402068996, 96393637280, 152128057467 ],\n \"samples_ts\": [ 0.788167, 1.32789, 0.841396 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T07:19:23Z\",\n \"avg_ns\": 691970265169,\n \"stddev_ns\": 2651482420,\n \"avg_ts\": 0.740704,\n \"stddev_ts\": 0.029407,\n \"samples_ns\": [ 685997787464, 667646846940, 722266161104 ],\n \"samples_ts\": [ 0.746358, 0.766872, 0.70888 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-12T07:10:48Z", "avg_ns": 136974587914, "stddev_ns": 4042742778, "avg_ts": 0.985817, "stddev_ts": 0.297435, "samples_ns": [ 162402068996, 96393637280, 152128057467 ], "samples_ts": [ 0.788167, 1.32789, 0.841396 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-12T07:19:23Z", "avg_ns": 691970265169, "stddev_ns": 2651482420, "avg_ts": 0.740704, "stddev_ts": 0.029407, "samples_ns": [ 685997787464, 667646846940, 722266161104 ], "samples_ts": [ 0.746358, 0.766872, 0.70888 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-12B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 681 }, { "timestamp_utc": "2025-12-12T08:36:40.048695+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T07:54:03Z\",\n \"avg_ns\": 522426510775,\n \"stddev_ns\": 4199555837,\n \"avg_ts\": 0.981702,\n \"stddev_ts\": 0.050167,\n \"samples_ns\": [ 537057622760, 537737125590, 492484783975 ],\n \"samples_ts\": [ 0.953343, 0.952138, 1.03963 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T08:28:22Z\",\n \"avg_ns\": 165348333527,\n \"stddev_ns\": 554463871,\n \"avg_ts\": 0.777954,\n \"stddev_ts\": 0.068243,\n \"samples_ns\": [ 170165654917, 176228514152, 149650831514 ],\n \"samples_ts\": [ 0.752208, 0.72633, 0.855324 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-12T07:54:03Z", "avg_ns": 522426510775, "stddev_ns": 4199555837, "avg_ts": 0.981702, "stddev_ts": 0.050167, "samples_ns": [ 537057622760, 537737125590, 492484783975 ], "samples_ts": [ 0.953343, 0.952138, 1.03963 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-12T08:28:22Z", "avg_ns": 165348333527, "stddev_ns": 554463871, "avg_ts": 0.777954, "stddev_ts": 0.068243, "samples_ns": [ 170165654917, 176228514152, 149650831514 ], "samples_ts": [ 0.752208, 0.72633, 0.855324 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-12B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 682 }, { "timestamp_utc": "2025-12-12T09:45:44.868279+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "--threads", "1", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T08:36:41Z\",\n \"avg_ns\": 514144544415,\n \"stddev_ns\": 4144224253,\n \"avg_ts\": 0.996879,\n \"stddev_ts\": 0.039830,\n \"samples_ns\": [ 517350829534, 492390238265, 532692565446 ],\n \"samples_ts\": [ 0.989657, 1.03983, 0.961155 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T09:11:22Z\",\n \"avg_ns\": 687045389834,\n \"stddev_ns\": 3578517202,\n \"avg_ts\": 0.745693,\n \"stddev_ts\": 0.022830,\n \"samples_ns\": [ 670741124028, 711202391679, 679192653796 ],\n \"samples_ts\": [ 0.763335, 0.719908, 0.753836 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-12T08:36:41Z", "avg_ns": 514144544415, "stddev_ns": 4144224253, "avg_ts": 0.996879, "stddev_ts": 0.03983, "samples_ns": [ 517350829534, 492390238265, 532692565446 ], "samples_ts": [ 0.989657, 1.03983, 0.961155 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 2048, "n_ubatch": 512, "n_threads": 1, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-12T09:11:22Z", "avg_ns": 687045389834, "stddev_ns": 3578517202, "avg_ts": 0.745693, "stddev_ts": 0.02283, "samples_ns": [ 670741124028, 711202391679, 679192653796 ], "samples_ts": [ 0.763335, 0.719908, 0.753836 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-12B-it-GGUF", "quantization": "Q2_K_L", "threads": 1, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 683 }, { "timestamp_utc": "2025-12-12T10:00:32.141721+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T09:45:45Z\",\n \"avg_ns\": 112835325904,\n \"stddev_ns\": 839118946,\n \"avg_ts\": 1.263564,\n \"stddev_ts\": 0.475573,\n \"samples_ns\": [ 165096514562, 74188096354, 99221366798 ],\n \"samples_ts\": [ 0.775304, 1.72534, 1.29004 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T09:52:22Z\",\n \"avg_ns\": 162459664787,\n \"stddev_ns\": 728280103,\n \"avg_ts\": 0.807432,\n \"stddev_ts\": 0.161860,\n \"samples_ns\": [ 184005173309, 128897205392, 174476615661 ],\n \"samples_ts\": [ 0.695633, 0.993039, 0.733623 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-12T09:45:45Z", "avg_ns": 112835325904, "stddev_ns": 839118946, "avg_ts": 1.263564, "stddev_ts": 0.475573, "samples_ns": [ 165096514562, 74188096354, 99221366798 ], "samples_ts": [ 0.775304, 1.72534, 1.29004 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-12T09:52:22Z", "avg_ns": 162459664787, "stddev_ns": 728280103, "avg_ts": 0.807432, "stddev_ts": 0.16186, "samples_ns": [ 184005173309, 128897205392, 174476615661 ], "samples_ts": [ 0.695633, 0.993039, 0.733623 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-12B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 512, "ubatch_size": 128, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 684 }, { "timestamp_utc": "2025-12-12T10:39:30.484597+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T10:00:33Z\",\n \"avg_ns\": 112680469730,\n \"stddev_ns\": 3789003014,\n \"avg_ts\": 1.276669,\n \"stddev_ts\": 0.511374,\n \"samples_ns\": [ 164920765344, 71181124921, 101939518926 ],\n \"samples_ts\": [ 0.77613, 1.79823, 1.25565 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T10:07:26Z\",\n \"avg_ns\": 640470180530,\n \"stddev_ns\": 2330851669,\n \"avg_ts\": 0.799420,\n \"stddev_ts\": 0.002904,\n \"samples_ns\": [ 643138685738, 639439538649, 638832317203 ],\n \"samples_ts\": [ 0.796096, 0.800701, 0.801462 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-12T10:00:33Z", "avg_ns": 112680469730, "stddev_ns": 3789003014, "avg_ts": 1.276669, "stddev_ts": 0.511374, "samples_ns": [ 164920765344, 71181124921, 101939518926 ], "samples_ts": [ 0.77613, 1.79823, 1.25565 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-12T10:07:26Z", "avg_ns": 640470180530, "stddev_ns": 2330851669, "avg_ts": 0.79942, "stddev_ts": 0.002904, "samples_ns": [ 643138685738, 639439538649, 638832317203 ], "samples_ts": [ 0.796096, 0.800701, 0.801462 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-12B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 512, "ubatch_size": 128, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 685 }, { "timestamp_utc": "2025-12-12T11:15:45.878485+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T10:39:32Z\",\n \"avg_ns\": 443152887286,\n \"stddev_ns\": 2152969265,\n \"avg_ts\": 1.158560,\n \"stddev_ts\": 0.074554,\n \"samples_ns\": [ 472289959109, 415184153628, 441984549122 ],\n \"samples_ts\": [ 1.08408, 1.23319, 1.15841 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T11:08:06Z\",\n \"avg_ns\": 152444009989,\n \"stddev_ns\": 3947046676,\n \"avg_ts\": 0.845189,\n \"stddev_ts\": 0.084038,\n \"samples_ns\": [ 152398069894, 167557213672, 137376746403 ],\n \"samples_ts\": [ 0.839906, 0.763918, 0.931744 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-12T10:39:32Z", "avg_ns": 443152887286, "stddev_ns": 2152969265, "avg_ts": 1.15856, "stddev_ts": 0.074554, "samples_ns": [ 472289959109, 415184153628, 441984549122 ], "samples_ts": [ 1.08408, 1.23319, 1.15841 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-12T11:08:06Z", "avg_ns": 152444009989, "stddev_ns": 3947046676, "avg_ts": 0.845189, "stddev_ts": 0.084038, "samples_ns": [ 152398069894, 167557213672, 137376746403 ], "samples_ts": [ 0.839906, 0.763918, 0.931744 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-12B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 512, "ubatch_size": 128, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 686 }, { "timestamp_utc": "2025-12-12T12:16:04.241376+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T11:15:47Z\",\n \"avg_ns\": 442070233615,\n \"stddev_ns\": 1697160734,\n \"avg_ts\": 1.160264,\n \"stddev_ts\": 0.059923,\n \"samples_ns\": [ 439596218760, 420415714268, 466198767818 ],\n \"samples_ts\": [ 1.16471, 1.21784, 1.09824 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T11:45:22Z\",\n \"avg_ns\": 613760957086,\n \"stddev_ns\": 1785711423,\n \"avg_ts\": 0.834260,\n \"stddev_ts\": 0.008555,\n \"samples_ns\": [ 610240799523, 609972286437, 621069785300 ],\n \"samples_ts\": [ 0.839013, 0.839382, 0.824384 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-12T11:15:47Z", "avg_ns": 442070233615, "stddev_ns": 1697160734, "avg_ts": 1.160264, "stddev_ts": 0.059923, "samples_ns": [ 439596218760, 420415714268, 466198767818 ], "samples_ts": [ 1.16471, 1.21784, 1.09824 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 512, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-12T11:45:22Z", "avg_ns": 613760957086, "stddev_ns": 1785711423, "avg_ts": 0.83426, "stddev_ts": 0.008555, "samples_ns": [ 610240799523, 609972286437, 621069785300 ], "samples_ts": [ 0.839013, 0.839382, 0.824384 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-12B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 512, "ubatch_size": 128, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 687 }, { "timestamp_utc": "2025-12-12T12:31:00.305318+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T12:16:05Z\",\n \"avg_ns\": 112522524880,\n \"stddev_ns\": 4175356644,\n \"avg_ts\": 1.334911,\n \"stddev_ts\": 0.668356,\n \"samples_ns\": [ 111011521023, 61675420012, 164880633605 ],\n \"samples_ts\": [ 1.15303, 2.07538, 0.776319 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T12:24:14Z\",\n \"avg_ns\": 134470271602,\n \"stddev_ns\": 3621651075,\n \"avg_ts\": 1.086117,\n \"stddev_ts\": 0.415590,\n \"samples_ns\": [ 100765010271, 209744280071, 92901524464 ],\n \"samples_ts\": [ 1.27028, 0.610267, 1.3778 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-12T12:16:05Z", "avg_ns": 112522524880, "stddev_ns": 4175356644, "avg_ts": 1.334911, "stddev_ts": 0.668356, "samples_ns": [ 111011521023, 61675420012, 164880633605 ], "samples_ts": [ 1.15303, 2.07538, 0.776319 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-12T12:24:14Z", "avg_ns": 134470271602, "stddev_ns": 3621651075, "avg_ts": 1.086117, "stddev_ts": 0.41559, "samples_ns": [ 100765010271, 209744280071, 92901524464 ], "samples_ts": [ 1.27028, 0.610267, 1.3778 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-12B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 512, "ubatch_size": 256, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 688 }, { "timestamp_utc": "2025-12-12T13:10:21.458316+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T12:31:01Z\",\n \"avg_ns\": 113497296436,\n \"stddev_ns\": 4087263520,\n \"avg_ts\": 1.426589,\n \"stddev_ts\": 0.903181,\n \"samples_ns\": [ 120525511378, 52136930121, 167829447811 ],\n \"samples_ts\": [ 1.06202, 2.45507, 0.762679 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T12:39:04Z\",\n \"avg_ns\": 624976989004,\n \"stddev_ns\": 3946718053,\n \"avg_ts\": 0.819548,\n \"stddev_ts\": 0.019761,\n \"samples_ns\": [ 640398134700, 624291751599, 610241080715 ],\n \"samples_ts\": [ 0.799503, 0.820129, 0.839013 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-12T12:31:01Z", "avg_ns": 113497296436, "stddev_ns": 4087263520, "avg_ts": 1.426589, "stddev_ts": 0.903181, "samples_ns": [ 120525511378, 52136930121, 167829447811 ], "samples_ts": [ 1.06202, 2.45507, 0.762679 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-12T12:39:04Z", "avg_ns": 624976989004, "stddev_ns": 3946718053, "avg_ts": 0.819548, "stddev_ts": 0.019761, "samples_ns": [ 640398134700, 624291751599, 610241080715 ], "samples_ts": [ 0.799503, 0.820129, 0.839013 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-12B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 512, "ubatch_size": 256, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 689 }, { "timestamp_utc": "2025-12-12T13:46:06.463186+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T13:10:22Z\",\n \"avg_ns\": 452041950931,\n \"stddev_ns\": 1001094681,\n \"avg_ts\": 1.158818,\n \"stddev_ts\": 0.224959,\n \"samples_ns\": [ 497664147277, 360924632581, 497537072937 ],\n \"samples_ts\": [ 1.02881, 1.41858, 1.02907 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T13:39:27Z\",\n \"avg_ns\": 132378052023,\n \"stddev_ns\": 4191672298,\n \"avg_ts\": 1.189930,\n \"stddev_ts\": 0.551556,\n \"samples_ns\": [ 92455926626, 225562122162, 79116107281 ],\n \"samples_ts\": [ 1.38444, 0.567471, 1.61788 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-12T13:10:22Z", "avg_ns": 452041950931, "stddev_ns": 1001094681, "avg_ts": 1.158818, "stddev_ts": 0.224959, "samples_ns": [ 497664147277, 360924632581, 497537072937 ], "samples_ts": [ 1.02881, 1.41858, 1.02907 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-12T13:39:27Z", "avg_ns": 132378052023, "stddev_ns": 4191672298, "avg_ts": 1.18993, "stddev_ts": 0.551556, "samples_ns": [ 92455926626, 225562122162, 79116107281 ], "samples_ts": [ 1.38444, 0.567471, 1.61788 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-12B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 512, "ubatch_size": 256, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 690 }, { "timestamp_utc": "2025-12-12T14:46:13.883522+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T13:46:07Z\",\n \"avg_ns\": 425569694516,\n \"stddev_ns\": 3566944011,\n \"avg_ts\": 1.208679,\n \"stddev_ts\": 0.100661,\n \"samples_ns\": [ 390707694534, 461611418073, 424389970943 ],\n \"samples_ts\": [ 1.31044, 1.10916, 1.20644 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T14:15:39Z\",\n \"avg_ns\": 611205759493,\n \"stddev_ns\": 1438211357,\n \"avg_ts\": 0.837692,\n \"stddev_ts\": 0.001968,\n \"samples_ns\": [ 610396314221, 610354675348, 612866288910 ],\n \"samples_ts\": [ 0.838799, 0.838857, 0.835419 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-12T13:46:07Z", "avg_ns": 425569694516, "stddev_ns": 3566944011, "avg_ts": 1.208679, "stddev_ts": 0.100661, "samples_ns": [ 390707694534, 461611418073, 424389970943 ], "samples_ts": [ 1.31044, 1.10916, 1.20644 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 512, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-12T14:15:39Z", "avg_ns": 611205759493, "stddev_ns": 1438211357, "avg_ts": 0.837692, "stddev_ts": 0.001968, "samples_ns": [ 610396314221, 610354675348, 612866288910 ], "samples_ts": [ 0.838799, 0.838857, 0.835419 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-12B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 512, "ubatch_size": 256, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 691 }, { "timestamp_utc": "2025-12-12T15:01:08.600104+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T14:46:15Z\",\n \"avg_ns\": 114098544412,\n \"stddev_ns\": 1685025125,\n \"avg_ts\": 1.232547,\n \"stddev_ts\": 0.417200,\n \"samples_ns\": [ 94857234665, 81070943036, 166367455537 ],\n \"samples_ts\": [ 1.3494, 1.57886, 0.769381 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T14:54:43Z\",\n \"avg_ns\": 127700922627,\n \"stddev_ns\": 2598200977,\n \"avg_ts\": 1.311468,\n \"stddev_ts\": 0.662199,\n \"samples_ns\": [ 79838506237, 231260305857, 72003955788 ],\n \"samples_ts\": [ 1.60324, 0.553489, 1.77768 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-12T14:46:15Z", "avg_ns": 114098544412, "stddev_ns": 1685025125, "avg_ts": 1.232547, "stddev_ts": 0.4172, "samples_ns": [ 94857234665, 81070943036, 166367455537 ], "samples_ts": [ 1.3494, 1.57886, 0.769381 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-12T14:54:43Z", "avg_ns": 127700922627, "stddev_ns": 2598200977, "avg_ts": 1.311468, "stddev_ts": 0.662199, "samples_ns": [ 79838506237, 231260305857, 72003955788 ], "samples_ts": [ 1.60324, 0.553489, 1.77768 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-12B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 512, "ubatch_size": 512, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 692 }, { "timestamp_utc": "2025-12-12T15:41:07.316888+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T15:01:09Z\",\n \"avg_ns\": 113372320284,\n \"stddev_ns\": 2118000291,\n \"avg_ts\": 1.269549,\n \"stddev_ts\": 0.511031,\n \"samples_ns\": [ 102930659548, 71385548246, 165800753060 ],\n \"samples_ts\": [ 1.24356, 1.79308, 0.772011 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T15:09:29Z\",\n \"avg_ns\": 631794944944,\n \"stddev_ns\": 1127710329,\n \"avg_ts\": 0.810675,\n \"stddev_ts\": 0.018498,\n \"samples_ns\": [ 648663425431, 623352832048, 623368577353 ],\n \"samples_ts\": [ 0.789315, 0.821365, 0.821344 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-12T15:01:09Z", "avg_ns": 113372320284, "stddev_ns": 2118000291, "avg_ts": 1.269549, "stddev_ts": 0.511031, "samples_ns": [ 102930659548, 71385548246, 165800753060 ], "samples_ts": [ 1.24356, 1.79308, 0.772011 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-12T15:09:29Z", "avg_ns": 631794944944, "stddev_ns": 1127710329, "avg_ts": 0.810675, "stddev_ts": 0.018498, "samples_ns": [ 648663425431, 623352832048, 623368577353 ], "samples_ts": [ 0.789315, 0.821365, 0.821344 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-12B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 512, "ubatch_size": 512, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 693 }, { "timestamp_utc": "2025-12-12T16:18:40.327635+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T15:41:08Z\",\n \"avg_ns\": 425407919380,\n \"stddev_ns\": 3706919085,\n \"avg_ts\": 1.211384,\n \"stddev_ts\": 0.117876,\n \"samples_ns\": [ 388022343452, 471574594179, 416626820511 ],\n \"samples_ts\": [ 1.31951, 1.08572, 1.22892 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T16:10:44Z\",\n \"avg_ns\": 158365553423,\n \"stddev_ns\": 3953611026,\n \"avg_ts\": 0.821615,\n \"stddev_ts\": 0.134133,\n \"samples_ns\": [ 170372984857, 131109162978, 173614512434 ],\n \"samples_ts\": [ 0.751293, 0.976286, 0.737266 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-12T15:41:08Z", "avg_ns": 425407919380, "stddev_ns": 3706919085, "avg_ts": 1.211384, "stddev_ts": 0.117876, "samples_ns": [ 388022343452, 471574594179, 416626820511 ], "samples_ts": [ 1.31951, 1.08572, 1.22892 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-12T16:10:44Z", "avg_ns": 158365553423, "stddev_ns": 3953611026, "avg_ts": 0.821615, "stddev_ts": 0.134133, "samples_ns": [ 170372984857, 131109162978, 173614512434 ], "samples_ts": [ 0.751293, 0.976286, 0.737266 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-12B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 512, "ubatch_size": 512, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 694 }, { "timestamp_utc": "2025-12-12T17:19:39.654331+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "512", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T16:18:42Z\",\n \"avg_ns\": 432201643323,\n \"stddev_ns\": 2339400869,\n \"avg_ts\": 1.186529,\n \"stddev_ts\": 0.058700,\n \"samples_ns\": [ 438365013904, 449380286980, 408859629086 ],\n \"samples_ts\": [ 1.16798, 1.13935, 1.25226 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T16:47:19Z\",\n \"avg_ns\": 645714671136,\n \"stddev_ns\": 2104104935,\n \"avg_ts\": 0.792925,\n \"stddev_ts\": 0.002579,\n \"samples_ns\": [ 644796626623, 648121808378, 644225578409 ],\n \"samples_ts\": [ 0.794049, 0.789975, 0.794753 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-12T16:18:42Z", "avg_ns": 432201643323, "stddev_ns": 2339400869, "avg_ts": 1.186529, "stddev_ts": 0.0587, "samples_ns": [ 438365013904, 449380286980, 408859629086 ], "samples_ts": [ 1.16798, 1.13935, 1.25226 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 512, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-12T16:47:19Z", "avg_ns": 645714671136, "stddev_ns": 2104104935, "avg_ts": 0.792925, "stddev_ts": 0.002579, "samples_ns": [ 644796626623, 648121808378, 644225578409 ], "samples_ts": [ 0.794049, 0.789975, 0.794753 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-12B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 512, "ubatch_size": 512, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 695 }, { "timestamp_utc": "2025-12-12T17:34:45.300793+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T17:19:41Z\",\n \"avg_ns\": 103701817716,\n \"stddev_ns\": 4174022432,\n \"avg_ts\": 1.553188,\n \"stddev_ts\": 0.960636,\n \"samples_ns\": [ 107076363571, 155569443022, 48459646557 ],\n \"samples_ts\": [ 1.19541, 0.822784, 2.64137 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T17:25:58Z\",\n \"avg_ns\": 175231506007,\n \"stddev_ns\": 3435356769,\n \"avg_ts\": 0.870266,\n \"stddev_ts\": 0.491682,\n \"samples_ns\": [ 214121520295, 89021112249, 222551885478 ],\n \"samples_ts\": [ 0.597791, 1.43786, 0.575147 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-12T17:19:41Z", "avg_ns": 103701817716, "stddev_ns": 4174022432, "avg_ts": 1.553188, "stddev_ts": 0.960636, "samples_ns": [ 107076363571, 155569443022, 48459646557 ], "samples_ts": [ 1.19541, 0.822784, 2.64137 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-12T17:25:58Z", "avg_ns": 175231506007, "stddev_ns": 3435356769, "avg_ts": 0.870266, "stddev_ts": 0.491682, "samples_ns": [ 214121520295, 89021112249, 222551885478 ], "samples_ts": [ 0.597791, 1.43786, 0.575147 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-12B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 696 }, { "timestamp_utc": "2025-12-12T18:15:09.633939+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T17:34:47Z\",\n \"avg_ns\": 103795639104,\n \"stddev_ns\": 1479619400,\n \"avg_ts\": 1.543416,\n \"stddev_ts\": 0.958557,\n \"samples_ns\": [ 111488016589, 151348001243, 48550899481 ],\n \"samples_ts\": [ 1.14811, 0.845733, 2.63641 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T17:41:00Z\",\n \"avg_ns\": 682710691695,\n \"stddev_ns\": 2686221414,\n \"avg_ts\": 0.750149,\n \"stddev_ts\": 0.014974,\n \"samples_ns\": [ 667539762513, 693433357846, 687158954728 ],\n \"samples_ts\": [ 0.766996, 0.738355, 0.745097 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-12T17:34:47Z", "avg_ns": 103795639104, "stddev_ns": 1479619400, "avg_ts": 1.543416, "stddev_ts": 0.958557, "samples_ns": [ 111488016589, 151348001243, 48550899481 ], "samples_ts": [ 1.14811, 0.845733, 2.63641 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-12T17:41:00Z", "avg_ns": 682710691695, "stddev_ns": 2686221414, "avg_ts": 0.750149, "stddev_ts": 0.014974, "samples_ns": [ 667539762513, 693433357846, 687158954728 ], "samples_ts": [ 0.766996, 0.738355, 0.745097 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-12B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 697 }, { "timestamp_utc": "2025-12-12T18:50:56.564242+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T18:15:10Z\",\n \"avg_ns\": 452697442879,\n \"stddev_ns\": 740402504,\n \"avg_ts\": 1.146984,\n \"stddev_ts\": 0.171581,\n \"samples_ns\": [ 503276983504, 381647454215, 473167890920 ],\n \"samples_ts\": [ 1.01733, 1.34155, 1.08207 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T18:43:51Z\",\n \"avg_ns\": 140943335197,\n \"stddev_ns\": 1148408994,\n \"avg_ts\": 0.971508,\n \"stddev_ts\": 0.280891,\n \"samples_ns\": [ 119164610929, 195798371272, 107867023392 ],\n \"samples_ts\": [ 1.07414, 0.653734, 1.18665 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-12T18:15:10Z", "avg_ns": 452697442879, "stddev_ns": 740402504, "avg_ts": 1.146984, "stddev_ts": 0.171581, "samples_ns": [ 503276983504, 381647454215, 473167890920 ], "samples_ts": [ 1.01733, 1.34155, 1.08207 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-12T18:43:51Z", "avg_ns": 140943335197, "stddev_ns": 1148408994, "avg_ts": 0.971508, "stddev_ts": 0.280891, "samples_ns": [ 119164610929, 195798371272, 107867023392 ], "samples_ts": [ 1.07414, 0.653734, 1.18665 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-12B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 698 }, { "timestamp_utc": "2025-12-12T19:50:56.042050+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T18:50:57Z\",\n \"avg_ns\": 433817179352,\n \"stddev_ns\": 3690976331,\n \"avg_ts\": 1.181421,\n \"stddev_ts\": 0.046605,\n \"samples_ns\": [ 414669096454, 440918512679, 445863928925 ],\n \"samples_ts\": [ 1.23472, 1.16121, 1.14833 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T19:20:31Z\",\n \"avg_ns\": 607936037427,\n \"stddev_ns\": 523956281,\n \"avg_ts\": 0.842194,\n \"stddev_ts\": 0.000726,\n \"samples_ns\": [ 608006633762, 608421116337, 607380362182 ],\n \"samples_ts\": [ 0.842096, 0.841522, 0.842964 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-12T18:50:57Z", "avg_ns": 433817179352, "stddev_ns": 3690976331, "avg_ts": 1.181421, "stddev_ts": 0.046605, "samples_ns": [ 414669096454, 440918512679, 445863928925 ], "samples_ts": [ 1.23472, 1.16121, 1.14833 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 1024, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-12T19:20:31Z", "avg_ns": 607936037427, "stddev_ns": 523956281, "avg_ts": 0.842194, "stddev_ts": 0.000726, "samples_ns": [ 608006633762, 608421116337, 607380362182 ], "samples_ts": [ 0.842096, 0.841522, 0.842964 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-12B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 1024, "ubatch_size": 128, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 699 }, { "timestamp_utc": "2025-12-12T20:05:53.464204+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T19:50:57Z\",\n \"avg_ns\": 112620680082,\n \"stddev_ns\": 3712453011,\n \"avg_ts\": 1.466233,\n \"stddev_ts\": 0.997309,\n \"samples_ns\": [ 132744088650, 48952210960, 156165740638 ],\n \"samples_ts\": [ 0.964261, 2.6148, 0.819642 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T19:58:45Z\",\n \"avg_ns\": 141986215092,\n \"stddev_ns\": 1095826485,\n \"avg_ts\": 0.944271,\n \"stddev_ts\": 0.229652,\n \"samples_ns\": [ 123000429239, 187646115161, 115312100876 ],\n \"samples_ts\": [ 1.04065, 0.682135, 1.11003 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-12T19:50:57Z", "avg_ns": 112620680082, "stddev_ns": 3712453011, "avg_ts": 1.466233, "stddev_ts": 0.997309, "samples_ns": [ 132744088650, 48952210960, 156165740638 ], "samples_ts": [ 0.964261, 2.6148, 0.819642 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-12T19:58:45Z", "avg_ns": 141986215092, "stddev_ns": 1095826485, "avg_ts": 0.944271, "stddev_ts": 0.229652, "samples_ns": [ 123000429239, 187646115161, 115312100876 ], "samples_ts": [ 1.04065, 0.682135, 1.11003 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-12B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 700 }, { "timestamp_utc": "2025-12-12T20:45:56.110824+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T20:05:54Z\",\n \"avg_ns\": 112629767845,\n \"stddev_ns\": 2292593942,\n \"avg_ts\": 1.471362,\n \"stddev_ts\": 1.014805,\n \"samples_ns\": [ 139026413202, 48439597087, 150423293248 ],\n \"samples_ts\": [ 0.920688, 2.64247, 0.850932 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T20:13:36Z\",\n \"avg_ns\": 645774358168,\n \"stddev_ns\": 3797116729,\n \"avg_ts\": 0.794598,\n \"stddev_ts\": 0.044942,\n \"samples_ns\": [ 689372000544, 623807605713, 624143468248 ],\n \"samples_ts\": [ 0.742705, 0.820766, 0.820324 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-12T20:05:54Z", "avg_ns": 112629767845, "stddev_ns": 2292593942, "avg_ts": 1.471362, "stddev_ts": 1.014805, "samples_ns": [ 139026413202, 48439597087, 150423293248 ], "samples_ts": [ 0.920688, 2.64247, 0.850932 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-12T20:13:36Z", "avg_ns": 645774358168, "stddev_ns": 3797116729, "avg_ts": 0.794598, "stddev_ts": 0.044942, "samples_ns": [ 689372000544, 623807605713, 624143468248 ], "samples_ts": [ 0.742705, 0.820766, 0.820324 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-12B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 701 }, { "timestamp_utc": "2025-12-12T21:23:03.468463+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T20:45:57Z\",\n \"avg_ns\": 434139767515,\n \"stddev_ns\": 2601199422,\n \"avg_ts\": 1.180571,\n \"stddev_ts\": 0.046710,\n \"samples_ns\": [ 416580766717, 435080142337, 450758393491 ],\n \"samples_ts\": [ 1.22905, 1.17679, 1.13586 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T21:15:29Z\",\n \"avg_ns\": 151178279340,\n \"stddev_ns\": 4288946734,\n \"avg_ts\": 0.856581,\n \"stddev_ts\": 0.111725,\n \"samples_ns\": [ 132729621636, 172670939824, 148134276562 ],\n \"samples_ts\": [ 0.964366, 0.741294, 0.864081 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-12T20:45:57Z", "avg_ns": 434139767515, "stddev_ns": 2601199422, "avg_ts": 1.180571, "stddev_ts": 0.04671, "samples_ns": [ 416580766717, 435080142337, 450758393491 ], "samples_ts": [ 1.22905, 1.17679, 1.13586 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-12T21:15:29Z", "avg_ns": 151178279340, "stddev_ns": 4288946734, "avg_ts": 0.856581, "stddev_ts": 0.111725, "samples_ns": [ 132729621636, 172670939824, 148134276562 ], "samples_ts": [ 0.964366, 0.741294, 0.864081 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-12B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 702 }, { "timestamp_utc": "2025-12-12T22:24:15.490788+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T21:23:05Z\",\n \"avg_ns\": 420290636941,\n \"stddev_ns\": 3803427455,\n \"avg_ts\": 1.230745,\n \"stddev_ts\": 0.149697,\n \"samples_ns\": [ 408204033893, 478202618175, 374465258757 ],\n \"samples_ts\": [ 1.25427, 1.07068, 1.36728 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T21:51:32Z\",\n \"avg_ns\": 653561124609,\n \"stddev_ns\": 371032783,\n \"avg_ts\": 0.783400,\n \"stddev_ts\": 0.000445,\n \"samples_ns\": [ 653895887710, 653162192683, 653625293434 ],\n \"samples_ts\": [ 0.782999, 0.783879, 0.783323 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-12T21:23:05Z", "avg_ns": 420290636941, "stddev_ns": 3803427455, "avg_ts": 1.230745, "stddev_ts": 0.149697, "samples_ns": [ 408204033893, 478202618175, 374465258757 ], "samples_ts": [ 1.25427, 1.07068, 1.36728 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 1024, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-12T21:51:32Z", "avg_ns": 653561124609, "stddev_ns": 371032783, "avg_ts": 0.7834, "stddev_ts": 0.000445, "samples_ns": [ 653895887710, 653162192683, 653625293434 ], "samples_ts": [ 0.782999, 0.783879, 0.783323 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-12B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 1024, "ubatch_size": 256, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 703 }, { "timestamp_utc": "2025-12-12T22:39:19.095759+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T22:24:17Z\",\n \"avg_ns\": 103702276000,\n \"stddev_ns\": 3113146215,\n \"avg_ts\": 1.492387,\n \"stddev_ts\": 0.755038,\n \"samples_ns\": [ 90202049124, 164799476253, 56105302625 ],\n \"samples_ts\": [ 1.41904, 0.776701, 2.28142 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T22:30:52Z\",\n \"avg_ns\": 168668601255,\n \"stddev_ns\": 4267847409,\n \"avg_ts\": 0.821607,\n \"stddev_ts\": 0.306034,\n \"samples_ns\": [ 193982103488, 108975562384, 203048137893 ],\n \"samples_ts\": [ 0.659855, 1.17458, 0.630392 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-12T22:24:17Z", "avg_ns": 103702276000, "stddev_ns": 3113146215, "avg_ts": 1.492387, "stddev_ts": 0.755038, "samples_ns": [ 90202049124, 164799476253, 56105302625 ], "samples_ts": [ 1.41904, 0.776701, 2.28142 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-12T22:30:52Z", "avg_ns": 168668601255, "stddev_ns": 4267847409, "avg_ts": 0.821607, "stddev_ts": 0.306034, "samples_ns": [ 193982103488, 108975562384, 203048137893 ], "samples_ts": [ 0.659855, 1.17458, 0.630392 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-12B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 704 }, { "timestamp_utc": "2025-12-12T23:19:40.341063+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T22:39:21Z\",\n \"avg_ns\": 103711286450,\n \"stddev_ns\": 2558955813,\n \"avg_ts\": 1.515705,\n \"stddev_ts\": 0.812855,\n \"samples_ns\": [ 92287298737, 165183281473, 53663279142 ],\n \"samples_ts\": [ 1.38697, 0.774897, 2.38524 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T22:45:53Z\",\n \"avg_ns\": 675409613274,\n \"stddev_ns\": 4071734341,\n \"avg_ts\": 0.759021,\n \"stddev_ts\": 0.033529,\n \"samples_ns\": [ 641835032795, 691081931579, 693311875449 ],\n \"samples_ts\": [ 0.797713, 0.740867, 0.738484 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-12T22:39:21Z", "avg_ns": 103711286450, "stddev_ns": 2558955813, "avg_ts": 1.515705, "stddev_ts": 0.812855, "samples_ns": [ 92287298737, 165183281473, 53663279142 ], "samples_ts": [ 1.38697, 0.774897, 2.38524 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-12T22:45:53Z", "avg_ns": 675409613274, "stddev_ns": 4071734341, "avg_ts": 0.759021, "stddev_ts": 0.033529, "samples_ns": [ 641835032795, 691081931579, 693311875449 ], "samples_ts": [ 0.797713, 0.740867, 0.738484 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-12B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 705 }, { "timestamp_utc": "2025-12-12T23:55:49.023372+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T23:19:42Z\",\n \"avg_ns\": 446092205874,\n \"stddev_ns\": 4164271949,\n \"avg_ts\": 1.152923,\n \"stddev_ts\": 0.095531,\n \"samples_ns\": [ 479918134866, 407738823613, 450619659143 ],\n \"samples_ts\": [ 1.06685, 1.25571, 1.13621 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T23:48:19Z\",\n \"avg_ns\": 149178651796,\n \"stddev_ns\": 2641029653,\n \"avg_ts\": 0.868486,\n \"stddev_ts\": 0.114092,\n \"samples_ns\": [ 143351070882, 172006162515, 132178721992 ],\n \"samples_ts\": [ 0.892913, 0.744159, 0.968386 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-12T23:19:42Z", "avg_ns": 446092205874, "stddev_ns": 4164271949, "avg_ts": 1.152923, "stddev_ts": 0.095531, "samples_ns": [ 479918134866, 407738823613, 450619659143 ], "samples_ts": [ 1.06685, 1.25571, 1.13621 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-12T23:48:19Z", "avg_ns": 149178651796, "stddev_ns": 2641029653, "avg_ts": 0.868486, "stddev_ts": 0.114092, "samples_ns": [ 143351070882, 172006162515, 132178721992 ], "samples_ts": [ 0.892913, 0.744159, 0.968386 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-12B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 706 }, { "timestamp_utc": "2025-12-13T00:55:56.835003+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "1024", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T23:55:50Z\",\n \"avg_ns\": 440201316942,\n \"stddev_ns\": 1745127691,\n \"avg_ts\": 1.164383,\n \"stddev_ts\": 0.046764,\n \"samples_ns\": [ 432644636089, 427155985880, 460803328858 ],\n \"samples_ts\": [ 1.18342, 1.19863, 1.1111 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T00:25:26Z\",\n \"avg_ns\": 609777167990,\n \"stddev_ns\": 50513467,\n \"avg_ts\": 0.839651,\n \"stddev_ts\": 0.000070,\n \"samples_ns\": [ 609804417527, 609808205369, 609718881074 ],\n \"samples_ts\": [ 0.839613, 0.839608, 0.839731 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-12T23:55:50Z", "avg_ns": 440201316942, "stddev_ns": 1745127691, "avg_ts": 1.164383, "stddev_ts": 0.046764, "samples_ns": [ 432644636089, 427155985880, 460803328858 ], "samples_ts": [ 1.18342, 1.19863, 1.1111 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 1024, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-13T00:25:26Z", "avg_ns": 609777167990, "stddev_ns": 50513467, "avg_ts": 0.839651, "stddev_ts": 7e-05, "samples_ns": [ 609804417527, 609808205369, 609718881074 ], "samples_ts": [ 0.839613, 0.839608, 0.839731 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-12B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 1024, "ubatch_size": 512, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 707 }, { "timestamp_utc": "2025-12-13T01:10:54.722613+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T00:55:57Z\",\n \"avg_ns\": 112655842151,\n \"stddev_ns\": 3407710006,\n \"avg_ts\": 1.467220,\n \"stddev_ts\": 0.999554,\n \"samples_ns\": [ 132543613668, 48887860968, 156536051817 ],\n \"samples_ts\": [ 0.96572, 2.61824, 0.817703 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T01:03:46Z\",\n \"avg_ns\": 142098671309,\n \"stddev_ns\": 4214984565,\n \"avg_ts\": 0.946484,\n \"stddev_ts\": 0.237667,\n \"samples_ns\": [ 123179722783, 189224463063, 113891828083 ],\n \"samples_ts\": [ 1.03913, 0.676445, 1.12387 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-13T00:55:57Z", "avg_ns": 112655842151, "stddev_ns": 3407710006, "avg_ts": 1.46722, "stddev_ts": 0.999554, "samples_ns": [ 132543613668, 48887860968, 156536051817 ], "samples_ts": [ 0.96572, 2.61824, 0.817703 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-13T01:03:46Z", "avg_ns": 142098671309, "stddev_ns": 4214984565, "avg_ts": 0.946484, "stddev_ts": 0.237667, "samples_ns": [ 123179722783, 189224463063, 113891828083 ], "samples_ts": [ 1.03913, 0.676445, 1.12387 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-12B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 708 }, { "timestamp_utc": "2025-12-13T01:50:58.193779+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T01:10:55Z\",\n \"avg_ns\": 112593300231,\n \"stddev_ns\": 1173111266,\n \"avg_ts\": 1.472165,\n \"stddev_ts\": 1.014676,\n \"samples_ns\": [ 137597103017, 48434549659, 151748248018 ],\n \"samples_ts\": [ 0.930252, 2.64274, 0.843502 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T01:18:38Z\",\n \"avg_ns\": 645651884581,\n \"stddev_ns\": 1585734823,\n \"avg_ts\": 0.794713,\n \"stddev_ts\": 0.044491,\n \"samples_ns\": [ 688783367749, 624096226096, 624076059899 ],\n \"samples_ts\": [ 0.74334, 0.820386, 0.820413 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-13T01:10:55Z", "avg_ns": 112593300231, "stddev_ns": 1173111266, "avg_ts": 1.472165, "stddev_ts": 1.014676, "samples_ns": [ 137597103017, 48434549659, 151748248018 ], "samples_ts": [ 0.930252, 2.64274, 0.843502 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-13T01:18:38Z", "avg_ns": 645651884581, "stddev_ns": 1585734823, "avg_ts": 0.794713, "stddev_ts": 0.044491, "samples_ns": [ 688783367749, 624096226096, 624076059899 ], "samples_ts": [ 0.74334, 0.820386, 0.820413 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-12B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 709 }, { "timestamp_utc": "2025-12-13T02:28:02.321099+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T01:50:59Z\",\n \"avg_ns\": 433107466396,\n \"stddev_ns\": 1180695900,\n \"avg_ts\": 1.183506,\n \"stddev_ts\": 0.049465,\n \"samples_ns\": [ 413048414362, 439501814943, 446772169884 ],\n \"samples_ts\": [ 1.23956, 1.16496, 1.146 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T02:20:31Z\",\n \"avg_ns\": 149898500415,\n \"stddev_ns\": 2434813739,\n \"avg_ts\": 0.860653,\n \"stddev_ts\": 0.091804,\n \"samples_ns\": [ 135778974958, 168088643086, 145827883201 ],\n \"samples_ts\": [ 0.942709, 0.761503, 0.877747 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-13T01:50:59Z", "avg_ns": 433107466396, "stddev_ns": 1180695900, "avg_ts": 1.183506, "stddev_ts": 0.049465, "samples_ns": [ 413048414362, 439501814943, 446772169884 ], "samples_ts": [ 1.23956, 1.16496, 1.146 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-13T02:20:31Z", "avg_ns": 149898500415, "stddev_ns": 2434813739, "avg_ts": 0.860653, "stddev_ts": 0.091804, "samples_ns": [ 135778974958, 168088643086, 145827883201 ], "samples_ts": [ 0.942709, 0.761503, 0.877747 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-12B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 710 }, { "timestamp_utc": "2025-12-13T03:28:43.545260+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "128", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T02:28:04Z\",\n \"avg_ns\": 421892523663,\n \"stddev_ns\": 2366450119,\n \"avg_ts\": 1.225085,\n \"stddev_ts\": 0.142855,\n \"samples_ns\": [ 409072602089, 478135487650, 378469481252 ],\n \"samples_ts\": [ 1.25161, 1.07083, 1.35282 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T02:56:38Z\",\n \"avg_ns\": 641084880914,\n \"stddev_ns\": 1988864801,\n \"avg_ts\": 0.798651,\n \"stddev_ts\": 0.002480,\n \"samples_ns\": [ 641535805419, 638909269377, 642809567948 ],\n \"samples_ts\": [ 0.798085, 0.801366, 0.796503 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-13T02:28:04Z", "avg_ns": 421892523663, "stddev_ns": 2366450119, "avg_ts": 1.225085, "stddev_ts": 0.142855, "samples_ns": [ 409072602089, 478135487650, 378469481252 ], "samples_ts": [ 1.25161, 1.07083, 1.35282 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 2048, "n_ubatch": 128, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-13T02:56:38Z", "avg_ns": 641084880914, "stddev_ns": 1988864801, "avg_ts": 0.798651, "stddev_ts": 0.00248, "samples_ns": [ 641535805419, 638909269377, 642809567948 ], "samples_ts": [ 0.798085, 0.801366, 0.796503 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-12B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 2048, "ubatch_size": 128, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 711 }, { "timestamp_utc": "2025-12-13T03:43:41.983607+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T03:28:45Z\",\n \"avg_ns\": 103755198637,\n \"stddev_ns\": 3778839432,\n \"avg_ts\": 1.432540,\n \"stddev_ts\": 0.585072,\n \"samples_ns\": [ 67441184333, 165003087847, 78821323732 ],\n \"samples_ts\": [ 1.89795, 0.775743, 1.62393 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T03:35:43Z\",\n \"avg_ns\": 159254363301,\n \"stddev_ns\": 3937350550,\n \"avg_ts\": 0.813152,\n \"stddev_ts\": 0.110770,\n \"samples_ns\": [ 166961289093, 136229040878, 174572759934 ],\n \"samples_ts\": [ 0.766645, 0.939594, 0.733219 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-13T03:28:45Z", "avg_ns": 103755198637, "stddev_ns": 3778839432, "avg_ts": 1.43254, "stddev_ts": 0.585072, "samples_ns": [ 67441184333, 165003087847, 78821323732 ], "samples_ts": [ 1.89795, 0.775743, 1.62393 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-13T03:35:43Z", "avg_ns": 159254363301, "stddev_ns": 3937350550, "avg_ts": 0.813152, "stddev_ts": 0.11077, "samples_ns": [ 166961289093, 136229040878, 174572759934 ], "samples_ts": [ 0.766645, 0.939594, 0.733219 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-12B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 712 }, { "timestamp_utc": "2025-12-13T04:23:59.010109+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T03:43:43Z\",\n \"avg_ns\": 103649470155,\n \"stddev_ns\": 1357914149,\n \"avg_ts\": 1.438463,\n \"stddev_ts\": 0.600816,\n \"samples_ns\": [ 65615729081, 164701347474, 80631333910 ],\n \"samples_ts\": [ 1.95075, 0.777164, 1.58747 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T03:50:42Z\",\n \"avg_ns\": 665175871837,\n \"stddev_ns\": 4074038284,\n \"avg_ts\": 0.771335,\n \"stddev_ts\": 0.043757,\n \"samples_ns\": [ 623868969374, 677210066222, 694448579917 ],\n \"samples_ts\": [ 0.820685, 0.756043, 0.737276 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-13T03:43:43Z", "avg_ns": 103649470155, "stddev_ns": 1357914149, "avg_ts": 1.438463, "stddev_ts": 0.600816, "samples_ns": [ 65615729081, 164701347474, 80631333910 ], "samples_ts": [ 1.95075, 0.777164, 1.58747 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-13T03:50:42Z", "avg_ns": 665175871837, "stddev_ns": 4074038284, "avg_ts": 0.771335, "stddev_ts": 0.043757, "samples_ns": [ 623868969374, 677210066222, 694448579917 ], "samples_ts": [ 0.820685, 0.756043, 0.737276 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-12B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 713 }, { "timestamp_utc": "2025-12-13T05:00:33.433170+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T04:24:01Z\",\n \"avg_ns\": 433009788211,\n \"stddev_ns\": 3675488178,\n \"avg_ts\": 1.183747,\n \"stddev_ts\": 0.049021,\n \"samples_ns\": [ 446161700331, 439810211459, 413057452845 ],\n \"samples_ts\": [ 1.14757, 1.16414, 1.23954 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T04:52:27Z\",\n \"avg_ns\": 161094175632,\n \"stddev_ns\": 2170589742,\n \"avg_ts\": 0.813458,\n \"stddev_ts\": 0.159862,\n \"samples_ns\": [ 180900680441, 128340431453, 174041415003 ],\n \"samples_ts\": [ 0.707571, 0.997347, 0.735457 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-13T04:24:01Z", "avg_ns": 433009788211, "stddev_ns": 3675488178, "avg_ts": 1.183747, "stddev_ts": 0.049021, "samples_ns": [ 446161700331, 439810211459, 413057452845 ], "samples_ts": [ 1.14757, 1.16414, 1.23954 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-13T04:52:27Z", "avg_ns": 161094175632, "stddev_ns": 2170589742, "avg_ts": 0.813458, "stddev_ts": 0.159862, "samples_ns": [ 180900680441, 128340431453, 174041415003 ], "samples_ts": [ 0.707571, 0.997347, 0.735457 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-12B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 714 }, { "timestamp_utc": "2025-12-13T06:00:29.563376+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "256", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T05:00:34Z\",\n \"avg_ns\": 453441476435,\n \"stddev_ns\": 2002151419,\n \"avg_ts\": 1.140364,\n \"stddev_ts\": 0.142538,\n \"samples_ns\": [ 470901425011, 393293217845, 496129786449 ],\n \"samples_ts\": [ 1.08728, 1.30183, 1.03199 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T05:30:13Z\",\n \"avg_ns\": 605080687028,\n \"stddev_ns\": 298909773,\n \"avg_ts\": 0.846168,\n \"stddev_ts\": 0.000418,\n \"samples_ns\": [ 605365395628, 605107304923, 604769360535 ],\n \"samples_ts\": [ 0.84577, 0.846131, 0.846604 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-13T05:00:34Z", "avg_ns": 453441476435, "stddev_ns": 2002151419, "avg_ts": 1.140364, "stddev_ts": 0.142538, "samples_ns": [ 470901425011, 393293217845, 496129786449 ], "samples_ts": [ 1.08728, 1.30183, 1.03199 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 2048, "n_ubatch": 256, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-13T05:30:13Z", "avg_ns": 605080687028, "stddev_ns": 298909773, "avg_ts": 0.846168, "stddev_ts": 0.000418, "samples_ns": [ 605365395628, 605107304923, 604769360535 ], "samples_ts": [ 0.84577, 0.846131, 0.846604 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-12B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 2048, "ubatch_size": 256, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 715 }, { "timestamp_utc": "2025-12-13T06:15:30.836306+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T06:00:30Z\",\n \"avg_ns\": 112827334977,\n \"stddev_ns\": 582596382,\n \"avg_ts\": 1.246088,\n \"stddev_ts\": 0.415146,\n \"samples_ns\": [ 165317924302, 82295430569, 90868650060 ],\n \"samples_ts\": [ 0.774266, 1.55537, 1.40863 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T06:07:13Z\",\n \"avg_ns\": 165207145060,\n \"stddev_ns\": 3880623910,\n \"avg_ts\": 0.810693,\n \"stddev_ts\": 0.224508,\n \"samples_ns\": [ 192612871314, 119715860422, 183292703446 ],\n \"samples_ts\": [ 0.664545, 1.0692, 0.698337 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-13T06:00:30Z", "avg_ns": 112827334977, "stddev_ns": 582596382, "avg_ts": 1.246088, "stddev_ts": 0.415146, "samples_ns": [ 165317924302, 82295430569, 90868650060 ], "samples_ts": [ 0.774266, 1.55537, 1.40863 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-13T06:07:13Z", "avg_ns": 165207145060, "stddev_ns": 3880623910, "avg_ts": 0.810693, "stddev_ts": 0.224508, "samples_ns": [ 192612871314, 119715860422, 183292703446 ], "samples_ts": [ 0.664545, 1.0692, 0.698337 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-12B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 128, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 716 }, { "timestamp_utc": "2025-12-13T06:55:40.200098+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "128", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T06:15:31Z\",\n \"avg_ns\": 118592120931,\n \"stddev_ns\": 4082249926,\n \"avg_ts\": 1.271682,\n \"stddev_ts\": 0.605264,\n \"samples_ns\": [ 182683119772, 67149703336, 105943539687 ],\n \"samples_ts\": [ 0.700667, 1.90619, 1.20819 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T06:22:34Z\",\n \"avg_ns\": 661106226756,\n \"stddev_ns\": 1262708336,\n \"avg_ts\": 0.775817,\n \"stddev_ts\": 0.040120,\n \"samples_ns\": [ 689888632797, 669198921242, 624231126230 ],\n \"samples_ts\": [ 0.742149, 0.765094, 0.820209 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 128, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-13T06:15:31Z", "avg_ns": 118592120931, "stddev_ns": 4082249926, "avg_ts": 1.271682, "stddev_ts": 0.605264, "samples_ns": [ 182683119772, 67149703336, 105943539687 ], "samples_ts": [ 0.700667, 1.90619, 1.20819 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-13T06:22:34Z", "avg_ns": 661106226756, "stddev_ns": 1262708336, "avg_ts": 0.775817, "stddev_ts": 0.04012, "samples_ns": [ 689888632797, 669198921242, 624231126230 ], "samples_ts": [ 0.742149, 0.765094, 0.820209 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-12B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 128, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 717 }, { "timestamp_utc": "2025-12-13T07:32:12.125925+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "128", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T06:55:41Z\",\n \"avg_ns\": 448060419825,\n \"stddev_ns\": 558266600,\n \"avg_ts\": 1.148811,\n \"stddev_ts\": 0.104071,\n \"samples_ns\": [ 455610121388, 405345704636, 483225433452 ],\n \"samples_ts\": [ 1.12377, 1.26312, 1.05955 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T07:25:18Z\",\n \"avg_ns\": 137570068479,\n \"stddev_ns\": 487337883,\n \"avg_ts\": 1.040578,\n \"stddev_ts\": 0.374631,\n \"samples_ns\": [ 96490388209, 207623556537, 108596260692 ],\n \"samples_ts\": [ 1.32656, 0.6165, 1.17868 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-13T06:55:41Z", "avg_ns": 448060419825, "stddev_ns": 558266600, "avg_ts": 1.148811, "stddev_ts": 0.104071, "samples_ns": [ 455610121388, 405345704636, 483225433452 ], "samples_ts": [ 1.12377, 1.26312, 1.05955 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 128, "n_depth": 0, "test_time": "2025-12-13T07:25:18Z", "avg_ns": 137570068479, "stddev_ns": 487337883, "avg_ts": 1.040578, "stddev_ts": 0.374631, "samples_ns": [ 96490388209, 207623556537, 108596260692 ], "samples_ts": [ 1.32656, 0.6165, 1.17868 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-12B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 512, "n_gen": 128, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 718 }, { "timestamp_utc": "2025-12-13T08:32:48.786193+00:00", "command": [ "/home/ubuntu/sunkiss/inference/llama.cpp/build/bin/llama-bench", "--model", "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "--threads", "2", "--batch-size", "2048", "--ubatch-size", "512", "--n-prompt", "512", "--n-gen", "512", "--repetitions", "3", "--output", "json" ], "returncode": 0, "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T07:32:14Z\",\n \"avg_ns\": 416491989808,\n \"stddev_ns\": 4067130096,\n \"avg_ts\": 1.256586,\n \"stddev_ts\": 0.215532,\n \"samples_ns\": [ 375994343626, 507603331438, 365878294362 ],\n \"samples_ts\": [ 1.36172, 1.00866, 1.39937 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T08:01:07Z\",\n \"avg_ns\": 633429283343,\n \"stddev_ns\": 994638012,\n \"avg_ts\": 0.808425,\n \"stddev_ts\": 0.012430,\n \"samples_ns\": [ 622300307727, 638417185123, 639570357179 ],\n \"samples_ts\": [ 0.822754, 0.801983, 0.800537 ]\n }\n]\n", "stderr": "", "parsed": [ { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 512, "n_gen": 0, "n_depth": 0, "test_time": "2025-12-13T07:32:14Z", "avg_ns": 416491989808, "stddev_ns": 4067130096, "avg_ts": 1.256586, "stddev_ts": 0.215532, "samples_ns": [ 375994343626, 507603331438, 365878294362 ], "samples_ts": [ 1.36172, 1.00866, 1.39937 ] }, { "build_commit": "2fa51c19b", "build_number": 7326, "cpu_info": "Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", "gpu_info": "", "backends": "CPU", "model_filename": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_type": "gemma3 12B Q2_K - Medium", "model_size": 4761669504, "model_n_params": 11766034176, "n_batch": 2048, "n_ubatch": 512, "n_threads": 2, "cpu_mask": "0x0", "cpu_strict": false, "poll": 50, "type_k": "f16", "type_v": "f16", "n_gpu_layers": 99, "n_cpu_moe": 0, "split_mode": "layer", "main_gpu": 0, "no_kv_offload": false, "flash_attn": false, "devices": "auto", "tensor_split": "0.00", "tensor_buft_overrides": "none", "use_mmap": true, "embeddings": false, "no_op_offload": 0, "no_host": false, "n_prompt": 0, "n_gen": 512, "n_depth": 0, "test_time": "2025-12-13T08:01:07Z", "avg_ns": 633429283343, "stddev_ns": 994638012, "avg_ts": 0.808425, "stddev_ts": 0.01243, "samples_ns": [ 622300307727, 638417185123, 639570357179 ], "samples_ts": [ 0.822754, 0.801983, 0.800537 ] } ], "params": { "model_path": "/home/ubuntu/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", "model_repo": "unsloth/gemma-3-12B-it-GGUF", "quantization": "Q2_K_L", "threads": 2, "batch_size": 2048, "ubatch_size": 512, "n_prompt": 512, "n_gen": 512, "repetitions": 3, "numa": null, "priority": 0, "progress": false }, "run_index": 719 } ], "plots": [ "/home/ubuntu/sunkiss/inference/result/throughput_vs_threads.png", "/home/ubuntu/sunkiss/inference/result/throughput_vs_batch.png", "/home/ubuntu/sunkiss/inference/result/latency_vs_threads.png" ] }