| { |
| "model_id": "Qwen/Qwen2.5-0.5B-Instruct", |
| "dtype": "float32", |
| "max_new_tokens": 64, |
| "scenarios": { |
| "baseline_fp": { |
| "scenario": "baseline_fp", |
| "kind": "baseline", |
| "load_s": 6.473804672998085, |
| "tokenize_s": 0.0005482277483679354, |
| "prefill_forward_s": 0.201041800000894, |
| "first_token_latency_s": 0.15509662850126915, |
| "generate_s": 6.624326677499994, |
| "decode_tokens_per_s": 9.662278657664311, |
| "token_match_vs_baseline": 1.0, |
| "rss_before_load_gb": 0.4110565185546875, |
| "rss_after_load_gb": 2.2805099487304688, |
| "rss_after_bench_gb": 2.3700904846191406, |
| "meta": null, |
| "delta_vs_baseline": { |
| "load_s": 0.0, |
| "tokenize_s": 0.0, |
| "prefill_forward_s": 0.0, |
| "first_token_latency_s": 0.0, |
| "generate_s": 0.0, |
| "decode_tokens_per_s": 0.0, |
| "rss_after_load_gb": 0.0, |
| "rss_after_bench_gb": 0.0 |
| } |
| }, |
| "qwen2.5-0.5b-3bit-norotor-skipemb": { |
| "scenario": "qwen2.5-0.5b-3bit-norotor-skipemb", |
| "kind": "quantized", |
| "load_s": 5.841964585000824, |
| "tokenize_s": 0.0004919829989376012, |
| "prefill_forward_s": 0.1902955715013377, |
| "first_token_latency_s": 0.1538466910005809, |
| "generate_s": 6.760918115751338, |
| "decode_tokens_per_s": 9.467052691535141, |
| "token_match_vs_baseline": 0.015625, |
| "rss_before_load_gb": 0.6937255859375, |
| "rss_after_load_gb": 3.087230682373047, |
| "rss_after_bench_gb": 3.1112403869628906, |
| "meta": { |
| "path": "artifacts/qwen2.5-0.5b-3bit-norotor-skipemb.pt", |
| "bits": 3, |
| "block_size": 128, |
| "lowrank_rank": 0, |
| "rotor_angle_scale": 0.0, |
| "rowwise": false, |
| "num_quantized": 168, |
| "num_passthrough": 123 |
| }, |
| "delta_vs_baseline": { |
| "load_s": -0.6318400879972614, |
| "tokenize_s": -5.6244749430334195e-05, |
| "prefill_forward_s": -0.01074622849955631, |
| "first_token_latency_s": -0.0012499375006882474, |
| "generate_s": 0.1365914382513438, |
| "decode_tokens_per_s": -0.1952259661291702, |
| "rss_after_load_gb": 0.8067207336425781, |
| "rss_after_bench_gb": 0.74114990234375 |
| } |
| }, |
| "qwen2.5-0.5b-rotorq3-centers-skipemb": { |
| "scenario": "qwen2.5-0.5b-rotorq3-centers-skipemb", |
| "kind": "quantized", |
| "load_s": 6.22768382600043, |
| "tokenize_s": 0.000522547246873728, |
| "prefill_forward_s": 0.19800069149823685, |
| "first_token_latency_s": 0.16200276899871824, |
| "generate_s": 6.969514832997447, |
| "decode_tokens_per_s": 9.195669356282204, |
| "token_match_vs_baseline": 0.01953125, |
| "rss_before_load_gb": 2.2186622619628906, |
| "rss_after_load_gb": 3.4447555541992188, |
| "rss_after_bench_gb": 3.4447555541992188, |
| "meta": { |
| "path": "artifacts/qwen2.5-0.5b-rotorq3-centers-skipemb.pt", |
| "bits": 3, |
| "block_size": 128, |
| "lowrank_rank": 0, |
| "rotor_angle_scale": 1.0, |
| "rowwise": false, |
| "num_quantized": 168, |
| "num_passthrough": 123 |
| }, |
| "delta_vs_baseline": { |
| "load_s": -0.24612084699765546, |
| "tokenize_s": -2.568050149420742e-05, |
| "prefill_forward_s": -0.0030411085026571527, |
| "first_token_latency_s": 0.006906140497449087, |
| "generate_s": 0.3451881554974534, |
| "decode_tokens_per_s": -0.4666093013821069, |
| "rss_after_load_gb": 1.16424560546875, |
| "rss_after_bench_gb": 1.0746650695800781 |
| } |
| }, |
| "qwen2.5-0.5b-rotorq3-flat-skipemb-r8": { |
| "scenario": "qwen2.5-0.5b-rotorq3-flat-skipemb-r8", |
| "kind": "quantized", |
| "load_s": 6.1000825989976875, |
| "tokenize_s": 0.0005129745004524011, |
| "prefill_forward_s": 0.19054085675088572, |
| "first_token_latency_s": 0.15325999474953278, |
| "generate_s": 6.9399479319999955, |
| "decode_tokens_per_s": 9.237942900430912, |
| "token_match_vs_baseline": 0.00390625, |
| "rss_before_load_gb": 2.5487518310546875, |
| "rss_after_load_gb": 3.445331573486328, |
| "rss_after_bench_gb": 3.445331573486328, |
| "meta": { |
| "path": "artifacts/qwen2.5-0.5b-rotorq3-flat-skipemb-r8.pt", |
| "bits": 3, |
| "block_size": 128, |
| "lowrank_rank": 8, |
| "rotor_angle_scale": 1.0, |
| "rowwise": false, |
| "num_quantized": 168, |
| "num_passthrough": 123 |
| }, |
| "delta_vs_baseline": { |
| "load_s": -0.3737220740003977, |
| "tokenize_s": -3.52532479155343e-05, |
| "prefill_forward_s": -0.010500943250008277, |
| "first_token_latency_s": -0.0018366337517363718, |
| "generate_s": 0.31562125450000167, |
| "decode_tokens_per_s": -0.4243357572333988, |
| "rss_after_load_gb": 1.1648216247558594, |
| "rss_after_bench_gb": 1.0752410888671875 |
| } |
| }, |
| "qwen2.5-0.5b-rotorq3-mlp-only": { |
| "scenario": "qwen2.5-0.5b-rotorq3-mlp-only", |
| "kind": "quantized", |
| "load_s": 6.21272536500328, |
| "tokenize_s": 0.0005316745027812431, |
| "prefill_forward_s": 0.1856292709999252, |
| "first_token_latency_s": 0.15140363250066002, |
| "generate_s": 6.670889891249317, |
| "decode_tokens_per_s": 9.598262406082874, |
| "token_match_vs_baseline": 0.08203125, |
| "rss_before_load_gb": 2.576690673828125, |
| "rss_after_load_gb": 3.74371337890625, |
| "rss_after_bench_gb": 3.7438125610351562, |
| "meta": { |
| "path": "artifacts/qwen2.5-0.5b-rotorq3-mlp-only.pt", |
| "bits": 3, |
| "block_size": 128, |
| "lowrank_rank": 0, |
| "rotor_angle_scale": 1.0, |
| "rowwise": false, |
| "num_quantized": 72, |
| "num_passthrough": 219 |
| }, |
| "delta_vs_baseline": { |
| "load_s": -0.2610793079948053, |
| "tokenize_s": -1.6553245586692356e-05, |
| "prefill_forward_s": -0.015412529000968789, |
| "first_token_latency_s": -0.003692996000609128, |
| "generate_s": 0.04656321374932304, |
| "decode_tokens_per_s": -0.06401625158143709, |
| "rss_after_load_gb": 1.4632034301757812, |
| "rss_after_bench_gb": 1.3737220764160156 |
| } |
| }, |
| "qwen2.5-0.5b-rotorq3-rowwise-skipemb": { |
| "scenario": "qwen2.5-0.5b-rotorq3-rowwise-skipemb", |
| "kind": "quantized", |
| "load_s": 5.950046132005809, |
| "tokenize_s": 0.0005396470023697475, |
| "prefill_forward_s": 0.1872971802495158, |
| "first_token_latency_s": 0.15000393424816139, |
| "generate_s": 6.734860859251057, |
| "decode_tokens_per_s": 9.505182046546704, |
| "token_match_vs_baseline": 0.0234375, |
| "rss_before_load_gb": 2.6743202209472656, |
| "rss_after_load_gb": 3.7621498107910156, |
| "rss_after_bench_gb": 3.7620086669921875, |
| "meta": { |
| "path": "artifacts/qwen2.5-0.5b-rotorq3-rowwise-skipemb.pt", |
| "bits": 3, |
| "block_size": 128, |
| "lowrank_rank": 0, |
| "rotor_angle_scale": 1.0, |
| "rowwise": false, |
| "num_quantized": 168, |
| "num_passthrough": 123 |
| }, |
| "delta_vs_baseline": { |
| "load_s": -0.5237585409922758, |
| "tokenize_s": -8.580745998187922e-06, |
| "prefill_forward_s": -0.013744619751378195, |
| "first_token_latency_s": -0.005092694253107766, |
| "generate_s": 0.11053418175106344, |
| "decode_tokens_per_s": -0.1570966111176073, |
| "rss_after_load_gb": 1.4816398620605469, |
| "rss_after_bench_gb": 1.3919181823730469 |
| } |
| }, |
| "qwen2.5-0.5b-rotorq3": { |
| "scenario": "qwen2.5-0.5b-rotorq3", |
| "kind": "quantized", |
| "load_s": 8.498550903997966, |
| "tokenize_s": 0.0005228027494013077, |
| "prefill_forward_s": 0.19032409149986051, |
| "first_token_latency_s": 0.1559712282505643, |
| "generate_s": 7.009008278000692, |
| "decode_tokens_per_s": 9.14007514579254, |
| "token_match_vs_baseline": 0.015625, |
| "rss_before_load_gb": 2.47454833984375, |
| "rss_after_load_gb": 3.1861610412597656, |
| "rss_after_bench_gb": 3.18621826171875, |
| "meta": { |
| "path": "artifacts/qwen2.5-0.5b-rotorq3.pt", |
| "bits": 3, |
| "block_size": 128, |
| "lowrank_rank": 0, |
| "rotor_angle_scale": 1.0, |
| "rowwise": false, |
| "num_quantized": 170, |
| "num_passthrough": 121 |
| }, |
| "delta_vs_baseline": { |
| "load_s": 2.024746230999881, |
| "tokenize_s": -2.5424998966627754e-05, |
| "prefill_forward_s": -0.010717708501033485, |
| "first_token_latency_s": 0.0008745997492951574, |
| "generate_s": 0.38468160050069855, |
| "decode_tokens_per_s": -0.5222035118717709, |
| "rss_after_load_gb": 0.9056510925292969, |
| "rss_after_bench_gb": 0.8161277770996094 |
| } |
| } |
| } |
| } |