| { |
| "baseline_fp32": { |
| "scenario": "baseline_fp32", |
| "load_s": 6.4945597410041955, |
| "tokenize_s": 0.0005106902499392163, |
| "prefill_forward_s": 0.19786394450056832, |
| "first_token_latency_s": 0.1586934420010948, |
| "generate_s": 6.546512500754034, |
| "decode_tokens_per_s": 9.776255744739366, |
| "token_match_vs_baseline": 1.0, |
| "rss_before_load_gb": 0.4108467102050781, |
| "rss_after_load_gb": 2.280529022216797, |
| "rss_after_bench_gb": 2.3896942138671875 |
| }, |
| "rotorquant_pkg": { |
| "scenario": "rotorquant_pkg", |
| "load_s": 6.5327220170001965, |
| "tokenize_s": 0.0006985757481743349, |
| "prefill_forward_s": 0.18398665499989875, |
| "first_token_latency_s": 0.15238651200161257, |
| "generate_s": 6.707383360248059, |
| "decode_tokens_per_s": 9.551668113447072, |
| "token_match_vs_baseline": 0.08203125, |
| "rss_before_load_gb": 0.7133522033691406, |
| "rss_after_load_gb": 2.70635986328125, |
| "rss_after_bench_gb": 2.70635986328125, |
| "delta_vs_baseline": { |
| "load_s": 0.03816227599600097, |
| "prefill_forward_s": -0.013877289500669576, |
| "first_token_latency_s": -0.006306929999482236, |
| "generate_s": 0.16087085949402535, |
| "decode_tokens_per_s": -0.22458763129229453, |
| "rss_after_load_gb": 0.4258308410644531 |
| } |
| }, |
| "rotorquant_fused_runtime": { |
| "scenario": "rotorquant_fused_runtime", |
| "load_s": 3.895400952002092, |
| "tokenize_s": 0.0004967142504028743, |
| "prefill_forward_s": 0.5016050927497417, |
| "first_token_latency_s": 0.1620842227475805, |
| "generate_s": 6.19019198299975, |
| "decode_tokens_per_s": 10.339034218331195, |
| "token_match_vs_baseline": 0.00390625, |
| "rss_before_load_gb": 1.5660324096679688, |
| "rss_after_load_gb": 2.888378143310547, |
| "rss_after_bench_gb": 3.717845916748047, |
| "delta_vs_baseline": { |
| "load_s": -2.5991587890021037, |
| "prefill_forward_s": 0.3037411482491734, |
| "first_token_latency_s": 0.0033907807464856887, |
| "generate_s": -0.3563205177542841, |
| "decode_tokens_per_s": 0.5627784735918286, |
| "rss_after_load_gb": 0.60784912109375 |
| } |
| }, |
| "runtime_dynamic_int8": { |
| "scenario": "runtime_dynamic_int8", |
| "load_s": 4.646019098996476, |
| "tokenize_s": 0.0004673119983635843, |
| "prefill_forward_s": 0.1323147195016645, |
| "first_token_latency_s": 0.09710656300012488, |
| "generate_s": 4.059147370002393, |
| "decode_tokens_per_s": 15.767303236288763, |
| "token_match_vs_baseline": 0.015625, |
| "rss_before_load_gb": 1.7176094055175781, |
| "rss_after_load_gb": 2.645465850830078, |
| "rss_after_bench_gb": 2.6685028076171875, |
| "delta_vs_baseline": { |
| "load_s": -1.8485406420077197, |
| "prefill_forward_s": -0.06554922499890381, |
| "first_token_latency_s": -0.06158687900096993, |
| "generate_s": -2.487365130751641, |
| "decode_tokens_per_s": 5.991047491549397, |
| "rss_after_load_gb": 0.36493682861328125 |
| } |
| } |
| } |