{ "base_model": "AIDC-AI/Ovis2-8B", "method": "layer_pruning_ppl", "method_name": "PPL-based (Shortened LLaMA)", "layers_removed": [ 4, 5, 7, 8, 10, 11, 13, 15 ], "n_layers_original": 28, "n_layers_remaining": 20, "params_before_M": 8935.3, "params_after_M": 7070.8, "param_reduction_pct": 20.9, "benchmarks": { "vqav2": { "accuracy": 0.0, "avg_latency_s": 0.5913, "peak_memory_mb": 18675.1, "avg_memory_mb": 18638.7, "throughput_sps": 1.69, "avg_power_w": 289.8, "avg_gpu_util_pct": 94.2, "n_samples": 50, "n_evaluated": 50, "n_skipped": 0, "all_failed": false, "zero_accuracy_warning": true, "metrics": { "exact_match": 0.0, "contains": 0.26, "token_f1": 0.0731, "bleu": 0.0457, "rouge_l": 0.0731 } } } }