{ "base_model": "AIDC-AI/Ovis2-4B", "method": "layer_pruning_ppl", "method_name": "PPL-based (Shortened LLaMA)", "layers_removed": [ 3, 4, 5, 6, 7, 8, 10, 11, 18, 19 ], "n_layers_original": 36, "n_layers_remaining": 26, "params_before_M": 4304.9, "params_after_M": 3534.2, "param_reduction_pct": 17.9, "benchmarks": { "vqav2": { "accuracy": 0.06, "avg_latency_s": 0.5029, "peak_memory_mb": 15235.1, "avg_memory_mb": 15230.8, "throughput_sps": 1.987, "avg_power_w": 236.7, "avg_gpu_util_pct": 69.1, "n_samples": 50, "n_evaluated": 50, "n_skipped": 0, "all_failed": false, "zero_accuracy_warning": false, "metrics": { "exact_match": 0.06, "contains": 0.42, "token_f1": 0.1762, "bleu": 0.135, "rouge_l": 0.1762 } } } }