Layer-pruned Ovis2-4B (PPL-based (Shortened LLaMA), removed 10 layers, 17.9% reduction)
Browse files- config.json +1 -1
- model.safetensors +2 -2
- pruning_info.json +17 -25
config.json
CHANGED
|
@@ -111,7 +111,7 @@
|
|
| 111 |
},
|
| 112 |
"model_type": "ovis",
|
| 113 |
"multimodal_max_length": 32768,
|
| 114 |
-
"num_hidden_layers":
|
| 115 |
"transformers_version": "5.3.0",
|
| 116 |
"visual_tokenizer_config": {
|
| 117 |
"_attn_implementation_autoset": true,
|
|
|
|
| 111 |
},
|
| 112 |
"model_type": "ovis",
|
| 113 |
"multimodal_max_length": 32768,
|
| 114 |
+
"num_hidden_layers": 26,
|
| 115 |
"transformers_version": "5.3.0",
|
| 116 |
"visual_tokenizer_config": {
|
| 117 |
"_attn_implementation_autoset": true,
|
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f3a5829c5ba683148706c7226f8dd1b8abfb132bc572cf436f77430e4843d5bb
|
| 3 |
+
size 7690733916
|
pruning_info.json
CHANGED
|
@@ -9,44 +9,36 @@
|
|
| 9 |
6,
|
| 10 |
7,
|
| 11 |
8,
|
| 12 |
-
9,
|
| 13 |
10,
|
| 14 |
11,
|
| 15 |
-
12,
|
| 16 |
-
13,
|
| 17 |
-
14,
|
| 18 |
-
15,
|
| 19 |
-
16,
|
| 20 |
18,
|
| 21 |
-
19
|
| 22 |
-
20,
|
| 23 |
-
22
|
| 24 |
],
|
| 25 |
"n_layers_original": 36,
|
| 26 |
-
"n_layers_remaining":
|
| 27 |
"params_before_M": 4304.9,
|
| 28 |
-
"params_after_M":
|
| 29 |
-
"param_reduction_pct":
|
| 30 |
"benchmarks": {
|
| 31 |
"vqav2": {
|
| 32 |
-
"accuracy": 0.
|
| 33 |
-
"avg_latency_s": 0.
|
| 34 |
-
"peak_memory_mb":
|
| 35 |
-
"avg_memory_mb":
|
| 36 |
-
"throughput_sps": 1.
|
| 37 |
-
"avg_power_w":
|
| 38 |
-
"avg_gpu_util_pct":
|
| 39 |
"n_samples": 50,
|
| 40 |
"n_evaluated": 50,
|
| 41 |
"n_skipped": 0,
|
| 42 |
"all_failed": false,
|
| 43 |
-
"zero_accuracy_warning":
|
| 44 |
"metrics": {
|
| 45 |
-
"exact_match": 0.
|
| 46 |
-
"contains": 0.
|
| 47 |
-
"token_f1": 0.
|
| 48 |
-
"bleu": 0.
|
| 49 |
-
"rouge_l": 0.
|
| 50 |
}
|
| 51 |
}
|
| 52 |
}
|
|
|
|
| 9 |
6,
|
| 10 |
7,
|
| 11 |
8,
|
|
|
|
| 12 |
10,
|
| 13 |
11,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
18,
|
| 15 |
+
19
|
|
|
|
|
|
|
| 16 |
],
|
| 17 |
"n_layers_original": 36,
|
| 18 |
+
"n_layers_remaining": 26,
|
| 19 |
"params_before_M": 4304.9,
|
| 20 |
+
"params_after_M": 3534.2,
|
| 21 |
+
"param_reduction_pct": 17.9,
|
| 22 |
"benchmarks": {
|
| 23 |
"vqav2": {
|
| 24 |
+
"accuracy": 0.06,
|
| 25 |
+
"avg_latency_s": 0.5029,
|
| 26 |
+
"peak_memory_mb": 15235.1,
|
| 27 |
+
"avg_memory_mb": 15230.8,
|
| 28 |
+
"throughput_sps": 1.987,
|
| 29 |
+
"avg_power_w": 236.7,
|
| 30 |
+
"avg_gpu_util_pct": 69.1,
|
| 31 |
"n_samples": 50,
|
| 32 |
"n_evaluated": 50,
|
| 33 |
"n_skipped": 0,
|
| 34 |
"all_failed": false,
|
| 35 |
+
"zero_accuracy_warning": false,
|
| 36 |
"metrics": {
|
| 37 |
+
"exact_match": 0.06,
|
| 38 |
+
"contains": 0.42,
|
| 39 |
+
"token_f1": 0.1762,
|
| 40 |
+
"bleu": 0.135,
|
| 41 |
+
"rouge_l": 0.1762
|
| 42 |
}
|
| 43 |
}
|
| 44 |
}
|