| { | |
| "base_model": "LiquidAI/LFM2-VL-3B", | |
| "method": "layer_pruning_ppl", | |
| "method_name": "PPL-based (Shortened LLaMA)", | |
| "layers_removed": [ | |
| 3, | |
| 6, | |
| 7, | |
| 10, | |
| 11, | |
| 14, | |
| 15, | |
| 18, | |
| 25 | |
| ], | |
| "n_layers_original": 30, | |
| "n_layers_remaining": 21, | |
| "params_before_M": 2999.0, | |
| "params_after_M": 2253.3, | |
| "param_reduction_pct": 24.9, | |
| "benchmarks": { | |
| "vqav2": { | |
| "accuracy": 0.0, | |
| "avg_latency_s": 0.0, | |
| "peak_memory_mb": 7981.1, | |
| "avg_memory_mb": 7981.0, | |
| "throughput_sps": 0.0, | |
| "avg_power_w": 235.4, | |
| "avg_gpu_util_pct": 57.2, | |
| "n_samples": 50, | |
| "n_evaluated": 0, | |
| "n_skipped": 50, | |
| "all_failed": true, | |
| "zero_accuracy_warning": false | |
| } | |
| } | |
| } |