FORGE-Nano-Benchmark / benchmarks /bench_04_pruning.json
ilessio-aiflowlab's picture
Upload benchmarks/bench_04_pruning.json with huggingface_hub
61e1a5c verified
{
"benchmark": "pruning",
"timestamp": "2026-03-19T11:56:26.754002+00:00",
"device": "NVIDIA L4",
"model": "forge-nano",
"original_layers": 27,
"original_params_m": 967.9,
"n_calibration_samples": 5,
"importance_scoring": {
"alpha_0.4": {
"alpha": 0.4,
"compute_time_s": 18.5,
"scores": {
"0": 0.6821,
"1": 0.589083,
"2": 0.484819,
"3": 0.498897,
"4": 0.357,
"5": 0.312231,
"6": 0.419511,
"7": 0.3,
"8": 0.387732,
"9": 0.34533,
"10": 0.585286,
"11": 0.329692,
"12": 0.578829,
"13": 0.625759,
"14": 0.347474,
"15": 0.330667,
"16": 0.557559,
"17": 0.502955,
"18": 0.341126,
"19": 0.381665,
"20": 0.555531,
"21": 0.434183,
"22": 0.465446,
"23": 0.478263,
"24": 0.7,
"25": 0.627597,
"26": 0.580097
},
"top_5": [
24,
0,
25,
13,
1
],
"bottom_5": [
7,
5,
11,
15,
18
]
},
"alpha_0.6": {
"alpha": 0.6,
"compute_time_s": 18.24,
"scores": {
"0": 0.8,
"1": 0.593113,
"2": 0.272039,
"3": 0.330657,
"4": 0.260876,
"5": 0.2824,
"6": 0.26133,
"7": 0.363075,
"8": 0.303696,
"9": 0.289945,
"10": 0.2,
"11": 0.258675,
"12": 0.303762,
"13": 0.418191,
"14": 0.307108,
"15": 0.284902,
"16": 0.367257,
"17": 0.353113,
"18": 0.504782,
"19": 0.385914,
"20": 0.381048,
"21": 0.320433,
"22": 0.451704,
"23": 0.441047,
"24": 0.336173,
"25": 0.774385,
"26": 0.53751
},
"top_5": [
0,
25,
1,
26,
18
],
"bottom_5": [
10,
11,
4,
6,
2
]
},
"alpha_0.8": {
"alpha": 0.8,
"compute_time_s": 18.08,
"scores": {
"0": 0.798272,
"1": 0.512303,
"2": 0.363338,
"3": 0.278091,
"4": 0.2301,
"5": 0.201619,
"6": 0.352617,
"7": 0.503736,
"8": 0.439665,
"9": 0.222165,
"10": 0.494584,
"11": 0.280753,
"12": 0.317803,
"13": 0.565036,
"14": 0.323308,
"15": 0.346798,
"16": 0.50657,
"17": 0.52379,
"18": 0.424586,
"19": 0.477689,
"20": 0.49795,
"21": 0.544455,
"22": 0.514057,
"23": 0.1,
"24": 0.398986,
"25": 0.9,
"26": 0.736681
},
"top_5": [
25,
0,
26,
13,
21
],
"bottom_5": [
23,
5,
9,
4,
3
]
},
"alpha_1.0": {
"alpha": 1.0,
"compute_time_s": 17.96,
"scores": {
"0": 0.829495,
"1": 0.767334,
"2": 0.281207,
"3": 0.195048,
"4": 0.19936,
"5": 0.233463,
"6": 0.427037,
"7": 0.190362,
"8": 0.27661,
"9": 0.410202,
"10": 0.252309,
"11": 0.219139,
"12": 0.424771,
"13": 1.0,
"14": 0.0,
"15": 0.120373,
"16": 0.330967,
"17": 0.492051,
"18": 0.247872,
"19": 0.298858,
"20": 0.122975,
"21": 0.533206,
"22": 0.294316,
"23": 0.475619,
"24": 0.175312,
"25": 0.760964,
"26": 0.452499
},
"top_5": [
13,
0,
1,
25,
21
],
"bottom_5": [
14,
15,
20,
24,
7
]
}
},
"pruning_results": {
"keep_90pct": {
"keep_ratio": 0.9,
"layers_before": 27,
"layers_after": 24,
"layers_removed": [
15,
22,
23
],
"params_before": 967930567,
"params_before_m": 967.9,
"params_after": 922212055,
"params_after_m": 922.2,
"params_retained_pct": 95.3,
"prune_time_s": 0.88,
"latency_p50_ms": 120.26,
"latency_p95_ms": 124.98,
"latency_mean_ms": 120.64,
"fps": 8.3,
"gpu_mem_gb": 7.81,
"output_shape": [
1,
7
]
},
"keep_75pct": {
"keep_ratio": 0.75,
"layers_before": 27,
"layers_after": 20,
"layers_removed": [
8,
9,
10,
12,
15,
22,
23
],
"params_before": 967930567,
"params_before_m": 967.9,
"params_after": 861254039,
"params_after_m": 861.3,
"params_retained_pct": 89.0,
"prune_time_s": 0.83,
"latency_p50_ms": 104.33,
"latency_p95_ms": 106.68,
"latency_mean_ms": 104.44,
"fps": 9.6,
"gpu_mem_gb": 9.46,
"output_shape": [
1,
7
]
},
"keep_60pct": {
"keep_ratio": 0.6,
"layers_before": 27,
"layers_after": 16,
"layers_removed": [
4,
6,
7,
8,
9,
10,
12,
15,
22,
23,
24
],
"params_before": 967930567,
"params_before_m": 967.9,
"params_after": 800296023,
"params_after_m": 800.3,
"params_retained_pct": 82.7,
"prune_time_s": 0.84,
"latency_p50_ms": 90.25,
"latency_p95_ms": 91.62,
"latency_mean_ms": 90.4,
"fps": 11.1,
"gpu_mem_gb": 11.1,
"output_shape": [
1,
7
]
},
"keep_50pct": {
"keep_ratio": 0.5,
"layers_before": 27,
"layers_after": 13,
"layers_removed": [
2,
4,
6,
7,
8,
9,
10,
11,
12,
15,
17,
22,
23,
24
],
"params_before": 967930567,
"params_before_m": 967.9,
"params_after": 754577511,
"params_after_m": 754.6,
"params_retained_pct": 78.0,
"prune_time_s": 0.84,
"latency_p50_ms": 79.92,
"latency_p95_ms": 81.3,
"latency_mean_ms": 79.96,
"fps": 12.5,
"gpu_mem_gb": 12.75,
"output_shape": [
1,
7
]
}
}
}