| { | |
| "base_model": "AIDC-AI/Ovis2-4B", | |
| "method": "layer_pruning_ppl", | |
| "method_name": "PPL-based (Shortened LLaMA)", | |
| "layers_removed": [ | |
| 3, | |
| 4, | |
| 5, | |
| 6, | |
| 7, | |
| 8, | |
| 10, | |
| 11, | |
| 18, | |
| 19 | |
| ], | |
| "n_layers_original": 36, | |
| "n_layers_remaining": 26, | |
| "params_before_M": 4304.9, | |
| "params_after_M": 3534.2, | |
| "param_reduction_pct": 17.9, | |
| "benchmarks": { | |
| "vqav2": { | |
| "accuracy": 0.06, | |
| "avg_latency_s": 0.5029, | |
| "peak_memory_mb": 15235.1, | |
| "avg_memory_mb": 15230.8, | |
| "throughput_sps": 1.987, | |
| "avg_power_w": 236.7, | |
| "avg_gpu_util_pct": 69.1, | |
| "n_samples": 50, | |
| "n_evaluated": 50, | |
| "n_skipped": 0, | |
| "all_failed": false, | |
| "zero_accuracy_warning": false, | |
| "metrics": { | |
| "exact_match": 0.06, | |
| "contains": 0.42, | |
| "token_f1": 0.1762, | |
| "bleu": 0.135, | |
| "rouge_l": 0.1762 | |
| } | |
| } | |
| } | |
| } |