RotorQuant-ModelWeights-Runtime / artifacts /runtime_benchmark_mlp_int8.json
cnmoro's picture
Upload 29 files
18f4d80 verified
{
"baseline_fp32": {
"scenario": "baseline_fp32",
"load_s": 6.543875241004571,
"tokenize_s": 0.0005066652502137003,
"prefill_forward_s": 0.20371673600129725,
"first_token_latency_s": 0.15924113799883344,
"generate_s": 6.533192098997461,
"decode_tokens_per_s": 9.796191854460172,
"token_match_vs_baseline": 1.0,
"rss_before_load_gb": 0.41039276123046875,
"rss_after_load_gb": 2.2797927856445312,
"rss_after_bench_gb": 2.3897438049316406
},
"rotorquant_pkg": {
"scenario": "rotorquant_pkg",
"load_s": 6.701765562000219,
"tokenize_s": 0.0004997224987164373,
"prefill_forward_s": 0.194320453751061,
"first_token_latency_s": 0.15327360125047562,
"generate_s": 6.670602966249135,
"decode_tokens_per_s": 9.597136662718697,
"token_match_vs_baseline": 0.08203125,
"rss_before_load_gb": 0.7134361267089844,
"rss_after_load_gb": 2.7142677307128906,
"rss_after_bench_gb": 2.7142677307128906,
"delta_vs_baseline": {
"load_s": 0.15789032099564793,
"prefill_forward_s": -0.009396282250236254,
"first_token_latency_s": -0.005967536748357816,
"generate_s": 0.13741086725167406,
"decode_tokens_per_s": -0.19905519174147557,
"rss_after_load_gb": 0.4344749450683594
}
},
"runtime_dynamic_int8": {
"scenario": "runtime_dynamic_int8",
"load_s": 4.91570828499971,
"tokenize_s": 0.0004622100004780805,
"prefill_forward_s": 0.13502329399852897,
"first_token_latency_s": 0.09539279499949771,
"generate_s": 3.9486844472521625,
"decode_tokens_per_s": 16.208229137967894,
"token_match_vs_baseline": 0.015625,
"rss_before_load_gb": 1.4759712219238281,
"rss_after_load_gb": 2.553119659423828,
"rss_after_bench_gb": 2.5759239196777344,
"delta_vs_baseline": {
"load_s": -1.6281669560048613,
"prefill_forward_s": -0.06869344200276828,
"first_token_latency_s": -0.06384834299933573,
"generate_s": -2.584507651745298,
"decode_tokens_per_s": 6.412037283507722,
"rss_after_load_gb": 0.2733268737792969
}
}
}