Azaz666 commited on
Commit
563aa1a
·
verified ·
1 Parent(s): 6825dd9

Layer-pruned Ovis2-4B (PPL-based (Shortened LLaMA), removed 10 layers, 17.9% reduction)

Browse files
Files changed (3) hide show
  1. config.json +1 -1
  2. model.safetensors +2 -2
  3. pruning_info.json +17 -25
config.json CHANGED
@@ -111,7 +111,7 @@
111
  },
112
  "model_type": "ovis",
113
  "multimodal_max_length": 32768,
114
- "num_hidden_layers": 18,
115
  "transformers_version": "5.3.0",
116
  "visual_tokenizer_config": {
117
  "_attn_implementation_autoset": true,
 
111
  },
112
  "model_type": "ovis",
113
  "multimodal_max_length": 32768,
114
+ "num_hidden_layers": 26,
115
  "transformers_version": "5.3.0",
116
  "visual_tokenizer_config": {
117
  "_attn_implementation_autoset": true,
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:327f7dc09cc11d1971ae7aa31aa6de5f27d9cb490a293b283409f5c989adaf05
3
- size 6457490620
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3a5829c5ba683148706c7226f8dd1b8abfb132bc572cf436f77430e4843d5bb
3
+ size 7690733916
pruning_info.json CHANGED
@@ -9,44 +9,36 @@
9
  6,
10
  7,
11
  8,
12
- 9,
13
  10,
14
  11,
15
- 12,
16
- 13,
17
- 14,
18
- 15,
19
- 16,
20
  18,
21
- 19,
22
- 20,
23
- 22
24
  ],
25
  "n_layers_original": 36,
26
- "n_layers_remaining": 18,
27
  "params_before_M": 4304.9,
28
- "params_after_M": 2917.6,
29
- "param_reduction_pct": 32.2,
30
  "benchmarks": {
31
  "vqav2": {
32
- "accuracy": 0.0,
33
- "avg_latency_s": 0.6058,
34
- "peak_memory_mb": 15225.1,
35
- "avg_memory_mb": 15222.8,
36
- "throughput_sps": 1.65,
37
- "avg_power_w": 226.7,
38
- "avg_gpu_util_pct": 64.3,
39
  "n_samples": 50,
40
  "n_evaluated": 50,
41
  "n_skipped": 0,
42
  "all_failed": false,
43
- "zero_accuracy_warning": true,
44
  "metrics": {
45
- "exact_match": 0.0,
46
- "contains": 0.1,
47
- "token_f1": 0.0022,
48
- "bleu": 0.0012,
49
- "rouge_l": 0.0022
50
  }
51
  }
52
  }
 
9
  6,
10
  7,
11
  8,
 
12
  10,
13
  11,
 
 
 
 
 
14
  18,
15
+ 19
 
 
16
  ],
17
  "n_layers_original": 36,
18
+ "n_layers_remaining": 26,
19
  "params_before_M": 4304.9,
20
+ "params_after_M": 3534.2,
21
+ "param_reduction_pct": 17.9,
22
  "benchmarks": {
23
  "vqav2": {
24
+ "accuracy": 0.06,
25
+ "avg_latency_s": 0.5029,
26
+ "peak_memory_mb": 15235.1,
27
+ "avg_memory_mb": 15230.8,
28
+ "throughput_sps": 1.987,
29
+ "avg_power_w": 236.7,
30
+ "avg_gpu_util_pct": 69.1,
31
  "n_samples": 50,
32
  "n_evaluated": 50,
33
  "n_skipped": 0,
34
  "all_failed": false,
35
+ "zero_accuracy_warning": false,
36
  "metrics": {
37
+ "exact_match": 0.06,
38
+ "contains": 0.42,
39
+ "token_f1": 0.1762,
40
+ "bleu": 0.135,
41
+ "rouge_l": 0.1762
42
  }
43
  }
44
  }