{ "baseline_fp32": { "scenario": "baseline_fp32", "load_s": 6.543875241004571, "tokenize_s": 0.0005066652502137003, "prefill_forward_s": 0.20371673600129725, "first_token_latency_s": 0.15924113799883344, "generate_s": 6.533192098997461, "decode_tokens_per_s": 9.796191854460172, "token_match_vs_baseline": 1.0, "rss_before_load_gb": 0.41039276123046875, "rss_after_load_gb": 2.2797927856445312, "rss_after_bench_gb": 2.3897438049316406 }, "rotorquant_pkg": { "scenario": "rotorquant_pkg", "load_s": 6.701765562000219, "tokenize_s": 0.0004997224987164373, "prefill_forward_s": 0.194320453751061, "first_token_latency_s": 0.15327360125047562, "generate_s": 6.670602966249135, "decode_tokens_per_s": 9.597136662718697, "token_match_vs_baseline": 0.08203125, "rss_before_load_gb": 0.7134361267089844, "rss_after_load_gb": 2.7142677307128906, "rss_after_bench_gb": 2.7142677307128906, "delta_vs_baseline": { "load_s": 0.15789032099564793, "prefill_forward_s": -0.009396282250236254, "first_token_latency_s": -0.005967536748357816, "generate_s": 0.13741086725167406, "decode_tokens_per_s": -0.19905519174147557, "rss_after_load_gb": 0.4344749450683594 } }, "runtime_dynamic_int8": { "scenario": "runtime_dynamic_int8", "load_s": 4.91570828499971, "tokenize_s": 0.0004622100004780805, "prefill_forward_s": 0.13502329399852897, "first_token_latency_s": 0.09539279499949771, "generate_s": 3.9486844472521625, "decode_tokens_per_s": 16.208229137967894, "token_match_vs_baseline": 0.015625, "rss_before_load_gb": 1.4759712219238281, "rss_after_load_gb": 2.553119659423828, "rss_after_bench_gb": 2.5759239196777344, "delta_vs_baseline": { "load_s": -1.6281669560048613, "prefill_forward_s": -0.06869344200276828, "first_token_latency_s": -0.06384834299933573, "generate_s": -2.584507651745298, "decode_tokens_per_s": 6.412037283507722, "rss_after_load_gb": 0.2733268737792969 } } }