| { | |
| "base_model": "AIDC-AI/Ovis2-8B", | |
| "method": "layer_pruning_ppl", | |
| "method_name": "PPL-based (Shortened LLaMA)", | |
| "layers_removed": [ | |
| 4, | |
| 5, | |
| 7, | |
| 8, | |
| 10, | |
| 11, | |
| 13, | |
| 15 | |
| ], | |
| "n_layers_original": 28, | |
| "n_layers_remaining": 20, | |
| "params_before_M": 8935.3, | |
| "params_after_M": 7070.8, | |
| "param_reduction_pct": 20.9, | |
| "benchmarks": { | |
| "vqav2": { | |
| "accuracy": 0.0, | |
| "avg_latency_s": 0.5913, | |
| "peak_memory_mb": 18675.1, | |
| "avg_memory_mb": 18638.7, | |
| "throughput_sps": 1.69, | |
| "avg_power_w": 289.8, | |
| "avg_gpu_util_pct": 94.2, | |
| "n_samples": 50, | |
| "n_evaluated": 50, | |
| "n_skipped": 0, | |
| "all_failed": false, | |
| "zero_accuracy_warning": true, | |
| "metrics": { | |
| "exact_match": 0.0, | |
| "contains": 0.26, | |
| "token_f1": 0.0731, | |
| "bleu": 0.0457, | |
| "rouge_l": 0.0731 | |
| } | |
| } | |
| } | |
| } |