{ "baseline_fp32": { "scenario": "baseline_fp32", "load_s": 6.4945597410041955, "tokenize_s": 0.0005106902499392163, "prefill_forward_s": 0.19786394450056832, "first_token_latency_s": 0.1586934420010948, "generate_s": 6.546512500754034, "decode_tokens_per_s": 9.776255744739366, "token_match_vs_baseline": 1.0, "rss_before_load_gb": 0.4108467102050781, "rss_after_load_gb": 2.280529022216797, "rss_after_bench_gb": 2.3896942138671875 }, "rotorquant_pkg": { "scenario": "rotorquant_pkg", "load_s": 6.5327220170001965, "tokenize_s": 0.0006985757481743349, "prefill_forward_s": 0.18398665499989875, "first_token_latency_s": 0.15238651200161257, "generate_s": 6.707383360248059, "decode_tokens_per_s": 9.551668113447072, "token_match_vs_baseline": 0.08203125, "rss_before_load_gb": 0.7133522033691406, "rss_after_load_gb": 2.70635986328125, "rss_after_bench_gb": 2.70635986328125, "delta_vs_baseline": { "load_s": 0.03816227599600097, "prefill_forward_s": -0.013877289500669576, "first_token_latency_s": -0.006306929999482236, "generate_s": 0.16087085949402535, "decode_tokens_per_s": -0.22458763129229453, "rss_after_load_gb": 0.4258308410644531 } }, "rotorquant_fused_runtime": { "scenario": "rotorquant_fused_runtime", "load_s": 3.895400952002092, "tokenize_s": 0.0004967142504028743, "prefill_forward_s": 0.5016050927497417, "first_token_latency_s": 0.1620842227475805, "generate_s": 6.19019198299975, "decode_tokens_per_s": 10.339034218331195, "token_match_vs_baseline": 0.00390625, "rss_before_load_gb": 1.5660324096679688, "rss_after_load_gb": 2.888378143310547, "rss_after_bench_gb": 3.717845916748047, "delta_vs_baseline": { "load_s": -2.5991587890021037, "prefill_forward_s": 0.3037411482491734, "first_token_latency_s": 0.0033907807464856887, "generate_s": -0.3563205177542841, "decode_tokens_per_s": 0.5627784735918286, "rss_after_load_gb": 0.60784912109375 } }, "runtime_dynamic_int8": { "scenario": "runtime_dynamic_int8", "load_s": 4.646019098996476, "tokenize_s": 0.0004673119983635843, "prefill_forward_s": 0.1323147195016645, "first_token_latency_s": 0.09710656300012488, "generate_s": 4.059147370002393, "decode_tokens_per_s": 15.767303236288763, "token_match_vs_baseline": 0.015625, "rss_before_load_gb": 1.7176094055175781, "rss_after_load_gb": 2.645465850830078, "rss_after_bench_gb": 2.6685028076171875, "delta_vs_baseline": { "load_s": -1.8485406420077197, "prefill_forward_s": -0.06554922499890381, "first_token_latency_s": -0.06158687900096993, "generate_s": -2.487365130751641, "decode_tokens_per_s": 5.991047491549397, "rss_after_load_gb": 0.36493682861328125 } } }