krishnateja95 commited on
Commit
f2c97c3
·
verified ·
1 Parent(s): 408912e

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. metadata.json +1148 -0
  2. model.layers.0.mlp.down_proj/FP8_Block.pth +3 -0
  3. model.layers.0.mlp.down_proj/FP8_Dynamic.pth +3 -0
  4. model.layers.0.mlp.down_proj/NVFP4.pth +3 -0
  5. model.layers.0.mlp.gate_proj/FP8_Block.pth +3 -0
  6. model.layers.0.mlp.gate_proj/FP8_Dynamic.pth +3 -0
  7. model.layers.0.mlp.gate_proj/NVFP4.pth +3 -0
  8. model.layers.0.mlp.up_proj/FP8_Block.pth +3 -0
  9. model.layers.0.mlp.up_proj/FP8_Dynamic.pth +3 -0
  10. model.layers.0.mlp.up_proj/NVFP4.pth +3 -0
  11. model.layers.0.self_attn.k_proj/FP8_Block.pth +3 -0
  12. model.layers.0.self_attn.k_proj/FP8_Dynamic.pth +3 -0
  13. model.layers.0.self_attn.k_proj/NVFP4.pth +3 -0
  14. model.layers.0.self_attn.o_proj/FP8_Block.pth +3 -0
  15. model.layers.0.self_attn.o_proj/FP8_Dynamic.pth +3 -0
  16. model.layers.0.self_attn.o_proj/NVFP4.pth +3 -0
  17. model.layers.0.self_attn.q_proj/FP8_Block.pth +3 -0
  18. model.layers.0.self_attn.q_proj/FP8_Dynamic.pth +3 -0
  19. model.layers.0.self_attn.q_proj/NVFP4.pth +3 -0
  20. model.layers.0.self_attn.v_proj/FP8_Block.pth +3 -0
  21. model.layers.0.self_attn.v_proj/FP8_Dynamic.pth +3 -0
  22. model.layers.0.self_attn.v_proj/NVFP4.pth +3 -0
  23. model.layers.1.mlp.down_proj/FP8_Block.pth +3 -0
  24. model.layers.1.mlp.down_proj/FP8_Dynamic.pth +3 -0
  25. model.layers.1.mlp.down_proj/NVFP4.pth +3 -0
  26. model.layers.1.mlp.gate_proj/FP8_Block.pth +3 -0
  27. model.layers.1.mlp.gate_proj/FP8_Dynamic.pth +3 -0
  28. model.layers.1.mlp.gate_proj/NVFP4.pth +3 -0
  29. model.layers.1.mlp.up_proj/FP8_Block.pth +3 -0
  30. model.layers.1.mlp.up_proj/FP8_Dynamic.pth +3 -0
  31. model.layers.1.mlp.up_proj/NVFP4.pth +3 -0
  32. model.layers.1.self_attn.k_proj/FP8_Block.pth +3 -0
  33. model.layers.1.self_attn.k_proj/FP8_Dynamic.pth +3 -0
  34. model.layers.1.self_attn.k_proj/NVFP4.pth +3 -0
  35. model.layers.1.self_attn.o_proj/FP8_Block.pth +3 -0
  36. model.layers.1.self_attn.o_proj/FP8_Dynamic.pth +3 -0
  37. model.layers.1.self_attn.o_proj/NVFP4.pth +3 -0
  38. model.layers.1.self_attn.q_proj/FP8_Block.pth +3 -0
  39. model.layers.1.self_attn.q_proj/FP8_Dynamic.pth +3 -0
  40. model.layers.1.self_attn.q_proj/NVFP4.pth +3 -0
  41. model.layers.1.self_attn.v_proj/FP8_Block.pth +3 -0
  42. model.layers.1.self_attn.v_proj/FP8_Dynamic.pth +3 -0
  43. model.layers.1.self_attn.v_proj/NVFP4.pth +3 -0
  44. model.layers.10.mlp.down_proj/FP8_Block.pth +3 -0
  45. model.layers.10.mlp.down_proj/FP8_Dynamic.pth +3 -0
  46. model.layers.10.mlp.down_proj/NVFP4.pth +3 -0
  47. model.layers.10.mlp.gate_proj/FP8_Block.pth +3 -0
  48. model.layers.10.mlp.gate_proj/FP8_Dynamic.pth +3 -0
  49. model.layers.10.mlp.gate_proj/NVFP4.pth +3 -0
  50. model.layers.10.mlp.up_proj/FP8_Block.pth +3 -0
metadata.json ADDED
@@ -0,0 +1,1148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "llama",
3
+ "num_layers": 224,
4
+ "precisions": [
5
+ "FP8_Dynamic",
6
+ "FP8_Block",
7
+ "NVFP4"
8
+ ],
9
+ "precision_configs": {
10
+ "FP8_Dynamic": {
11
+ "effective_bits": 8.0,
12
+ "num_bits": 8,
13
+ "strategy": "channel"
14
+ },
15
+ "FP8_Block": {
16
+ "effective_bits": 8.0,
17
+ "num_bits": 8,
18
+ "strategy": "block"
19
+ },
20
+ "NVFP4": {
21
+ "effective_bits": 4.0,
22
+ "num_bits": 4,
23
+ "strategy": "tensor_group"
24
+ }
25
+ },
26
+ "layer_paths": {
27
+ "model.layers.0.self_attn.q_proj": {
28
+ "FP8_Dynamic": "quantized_layers/model.layers.0.self_attn.q_proj/FP8_Dynamic.pth",
29
+ "FP8_Block": "quantized_layers/model.layers.0.self_attn.q_proj/FP8_Block.pth",
30
+ "NVFP4": "quantized_layers/model.layers.0.self_attn.q_proj/NVFP4.pth"
31
+ },
32
+ "model.layers.0.self_attn.k_proj": {
33
+ "FP8_Dynamic": "quantized_layers/model.layers.0.self_attn.k_proj/FP8_Dynamic.pth",
34
+ "FP8_Block": "quantized_layers/model.layers.0.self_attn.k_proj/FP8_Block.pth",
35
+ "NVFP4": "quantized_layers/model.layers.0.self_attn.k_proj/NVFP4.pth"
36
+ },
37
+ "model.layers.0.self_attn.v_proj": {
38
+ "FP8_Dynamic": "quantized_layers/model.layers.0.self_attn.v_proj/FP8_Dynamic.pth",
39
+ "FP8_Block": "quantized_layers/model.layers.0.self_attn.v_proj/FP8_Block.pth",
40
+ "NVFP4": "quantized_layers/model.layers.0.self_attn.v_proj/NVFP4.pth"
41
+ },
42
+ "model.layers.0.self_attn.o_proj": {
43
+ "FP8_Dynamic": "quantized_layers/model.layers.0.self_attn.o_proj/FP8_Dynamic.pth",
44
+ "FP8_Block": "quantized_layers/model.layers.0.self_attn.o_proj/FP8_Block.pth",
45
+ "NVFP4": "quantized_layers/model.layers.0.self_attn.o_proj/NVFP4.pth"
46
+ },
47
+ "model.layers.0.mlp.gate_proj": {
48
+ "FP8_Dynamic": "quantized_layers/model.layers.0.mlp.gate_proj/FP8_Dynamic.pth",
49
+ "FP8_Block": "quantized_layers/model.layers.0.mlp.gate_proj/FP8_Block.pth",
50
+ "NVFP4": "quantized_layers/model.layers.0.mlp.gate_proj/NVFP4.pth"
51
+ },
52
+ "model.layers.0.mlp.up_proj": {
53
+ "FP8_Dynamic": "quantized_layers/model.layers.0.mlp.up_proj/FP8_Dynamic.pth",
54
+ "FP8_Block": "quantized_layers/model.layers.0.mlp.up_proj/FP8_Block.pth",
55
+ "NVFP4": "quantized_layers/model.layers.0.mlp.up_proj/NVFP4.pth"
56
+ },
57
+ "model.layers.0.mlp.down_proj": {
58
+ "FP8_Dynamic": "quantized_layers/model.layers.0.mlp.down_proj/FP8_Dynamic.pth",
59
+ "FP8_Block": "quantized_layers/model.layers.0.mlp.down_proj/FP8_Block.pth",
60
+ "NVFP4": "quantized_layers/model.layers.0.mlp.down_proj/NVFP4.pth"
61
+ },
62
+ "model.layers.1.self_attn.q_proj": {
63
+ "FP8_Dynamic": "quantized_layers/model.layers.1.self_attn.q_proj/FP8_Dynamic.pth",
64
+ "FP8_Block": "quantized_layers/model.layers.1.self_attn.q_proj/FP8_Block.pth",
65
+ "NVFP4": "quantized_layers/model.layers.1.self_attn.q_proj/NVFP4.pth"
66
+ },
67
+ "model.layers.1.self_attn.k_proj": {
68
+ "FP8_Dynamic": "quantized_layers/model.layers.1.self_attn.k_proj/FP8_Dynamic.pth",
69
+ "FP8_Block": "quantized_layers/model.layers.1.self_attn.k_proj/FP8_Block.pth",
70
+ "NVFP4": "quantized_layers/model.layers.1.self_attn.k_proj/NVFP4.pth"
71
+ },
72
+ "model.layers.1.self_attn.v_proj": {
73
+ "FP8_Dynamic": "quantized_layers/model.layers.1.self_attn.v_proj/FP8_Dynamic.pth",
74
+ "FP8_Block": "quantized_layers/model.layers.1.self_attn.v_proj/FP8_Block.pth",
75
+ "NVFP4": "quantized_layers/model.layers.1.self_attn.v_proj/NVFP4.pth"
76
+ },
77
+ "model.layers.1.self_attn.o_proj": {
78
+ "FP8_Dynamic": "quantized_layers/model.layers.1.self_attn.o_proj/FP8_Dynamic.pth",
79
+ "FP8_Block": "quantized_layers/model.layers.1.self_attn.o_proj/FP8_Block.pth",
80
+ "NVFP4": "quantized_layers/model.layers.1.self_attn.o_proj/NVFP4.pth"
81
+ },
82
+ "model.layers.1.mlp.gate_proj": {
83
+ "FP8_Dynamic": "quantized_layers/model.layers.1.mlp.gate_proj/FP8_Dynamic.pth",
84
+ "FP8_Block": "quantized_layers/model.layers.1.mlp.gate_proj/FP8_Block.pth",
85
+ "NVFP4": "quantized_layers/model.layers.1.mlp.gate_proj/NVFP4.pth"
86
+ },
87
+ "model.layers.1.mlp.up_proj": {
88
+ "FP8_Dynamic": "quantized_layers/model.layers.1.mlp.up_proj/FP8_Dynamic.pth",
89
+ "FP8_Block": "quantized_layers/model.layers.1.mlp.up_proj/FP8_Block.pth",
90
+ "NVFP4": "quantized_layers/model.layers.1.mlp.up_proj/NVFP4.pth"
91
+ },
92
+ "model.layers.1.mlp.down_proj": {
93
+ "FP8_Dynamic": "quantized_layers/model.layers.1.mlp.down_proj/FP8_Dynamic.pth",
94
+ "FP8_Block": "quantized_layers/model.layers.1.mlp.down_proj/FP8_Block.pth",
95
+ "NVFP4": "quantized_layers/model.layers.1.mlp.down_proj/NVFP4.pth"
96
+ },
97
+ "model.layers.2.self_attn.q_proj": {
98
+ "FP8_Dynamic": "quantized_layers/model.layers.2.self_attn.q_proj/FP8_Dynamic.pth",
99
+ "FP8_Block": "quantized_layers/model.layers.2.self_attn.q_proj/FP8_Block.pth",
100
+ "NVFP4": "quantized_layers/model.layers.2.self_attn.q_proj/NVFP4.pth"
101
+ },
102
+ "model.layers.2.self_attn.k_proj": {
103
+ "FP8_Dynamic": "quantized_layers/model.layers.2.self_attn.k_proj/FP8_Dynamic.pth",
104
+ "FP8_Block": "quantized_layers/model.layers.2.self_attn.k_proj/FP8_Block.pth",
105
+ "NVFP4": "quantized_layers/model.layers.2.self_attn.k_proj/NVFP4.pth"
106
+ },
107
+ "model.layers.2.self_attn.v_proj": {
108
+ "FP8_Dynamic": "quantized_layers/model.layers.2.self_attn.v_proj/FP8_Dynamic.pth",
109
+ "FP8_Block": "quantized_layers/model.layers.2.self_attn.v_proj/FP8_Block.pth",
110
+ "NVFP4": "quantized_layers/model.layers.2.self_attn.v_proj/NVFP4.pth"
111
+ },
112
+ "model.layers.2.self_attn.o_proj": {
113
+ "FP8_Dynamic": "quantized_layers/model.layers.2.self_attn.o_proj/FP8_Dynamic.pth",
114
+ "FP8_Block": "quantized_layers/model.layers.2.self_attn.o_proj/FP8_Block.pth",
115
+ "NVFP4": "quantized_layers/model.layers.2.self_attn.o_proj/NVFP4.pth"
116
+ },
117
+ "model.layers.2.mlp.gate_proj": {
118
+ "FP8_Dynamic": "quantized_layers/model.layers.2.mlp.gate_proj/FP8_Dynamic.pth",
119
+ "FP8_Block": "quantized_layers/model.layers.2.mlp.gate_proj/FP8_Block.pth",
120
+ "NVFP4": "quantized_layers/model.layers.2.mlp.gate_proj/NVFP4.pth"
121
+ },
122
+ "model.layers.2.mlp.up_proj": {
123
+ "FP8_Dynamic": "quantized_layers/model.layers.2.mlp.up_proj/FP8_Dynamic.pth",
124
+ "FP8_Block": "quantized_layers/model.layers.2.mlp.up_proj/FP8_Block.pth",
125
+ "NVFP4": "quantized_layers/model.layers.2.mlp.up_proj/NVFP4.pth"
126
+ },
127
+ "model.layers.2.mlp.down_proj": {
128
+ "FP8_Dynamic": "quantized_layers/model.layers.2.mlp.down_proj/FP8_Dynamic.pth",
129
+ "FP8_Block": "quantized_layers/model.layers.2.mlp.down_proj/FP8_Block.pth",
130
+ "NVFP4": "quantized_layers/model.layers.2.mlp.down_proj/NVFP4.pth"
131
+ },
132
+ "model.layers.3.self_attn.q_proj": {
133
+ "FP8_Dynamic": "quantized_layers/model.layers.3.self_attn.q_proj/FP8_Dynamic.pth",
134
+ "FP8_Block": "quantized_layers/model.layers.3.self_attn.q_proj/FP8_Block.pth",
135
+ "NVFP4": "quantized_layers/model.layers.3.self_attn.q_proj/NVFP4.pth"
136
+ },
137
+ "model.layers.3.self_attn.k_proj": {
138
+ "FP8_Dynamic": "quantized_layers/model.layers.3.self_attn.k_proj/FP8_Dynamic.pth",
139
+ "FP8_Block": "quantized_layers/model.layers.3.self_attn.k_proj/FP8_Block.pth",
140
+ "NVFP4": "quantized_layers/model.layers.3.self_attn.k_proj/NVFP4.pth"
141
+ },
142
+ "model.layers.3.self_attn.v_proj": {
143
+ "FP8_Dynamic": "quantized_layers/model.layers.3.self_attn.v_proj/FP8_Dynamic.pth",
144
+ "FP8_Block": "quantized_layers/model.layers.3.self_attn.v_proj/FP8_Block.pth",
145
+ "NVFP4": "quantized_layers/model.layers.3.self_attn.v_proj/NVFP4.pth"
146
+ },
147
+ "model.layers.3.self_attn.o_proj": {
148
+ "FP8_Dynamic": "quantized_layers/model.layers.3.self_attn.o_proj/FP8_Dynamic.pth",
149
+ "FP8_Block": "quantized_layers/model.layers.3.self_attn.o_proj/FP8_Block.pth",
150
+ "NVFP4": "quantized_layers/model.layers.3.self_attn.o_proj/NVFP4.pth"
151
+ },
152
+ "model.layers.3.mlp.gate_proj": {
153
+ "FP8_Dynamic": "quantized_layers/model.layers.3.mlp.gate_proj/FP8_Dynamic.pth",
154
+ "FP8_Block": "quantized_layers/model.layers.3.mlp.gate_proj/FP8_Block.pth",
155
+ "NVFP4": "quantized_layers/model.layers.3.mlp.gate_proj/NVFP4.pth"
156
+ },
157
+ "model.layers.3.mlp.up_proj": {
158
+ "FP8_Dynamic": "quantized_layers/model.layers.3.mlp.up_proj/FP8_Dynamic.pth",
159
+ "FP8_Block": "quantized_layers/model.layers.3.mlp.up_proj/FP8_Block.pth",
160
+ "NVFP4": "quantized_layers/model.layers.3.mlp.up_proj/NVFP4.pth"
161
+ },
162
+ "model.layers.3.mlp.down_proj": {
163
+ "FP8_Dynamic": "quantized_layers/model.layers.3.mlp.down_proj/FP8_Dynamic.pth",
164
+ "FP8_Block": "quantized_layers/model.layers.3.mlp.down_proj/FP8_Block.pth",
165
+ "NVFP4": "quantized_layers/model.layers.3.mlp.down_proj/NVFP4.pth"
166
+ },
167
+ "model.layers.4.self_attn.q_proj": {
168
+ "FP8_Dynamic": "quantized_layers/model.layers.4.self_attn.q_proj/FP8_Dynamic.pth",
169
+ "FP8_Block": "quantized_layers/model.layers.4.self_attn.q_proj/FP8_Block.pth",
170
+ "NVFP4": "quantized_layers/model.layers.4.self_attn.q_proj/NVFP4.pth"
171
+ },
172
+ "model.layers.4.self_attn.k_proj": {
173
+ "FP8_Dynamic": "quantized_layers/model.layers.4.self_attn.k_proj/FP8_Dynamic.pth",
174
+ "FP8_Block": "quantized_layers/model.layers.4.self_attn.k_proj/FP8_Block.pth",
175
+ "NVFP4": "quantized_layers/model.layers.4.self_attn.k_proj/NVFP4.pth"
176
+ },
177
+ "model.layers.4.self_attn.v_proj": {
178
+ "FP8_Dynamic": "quantized_layers/model.layers.4.self_attn.v_proj/FP8_Dynamic.pth",
179
+ "FP8_Block": "quantized_layers/model.layers.4.self_attn.v_proj/FP8_Block.pth",
180
+ "NVFP4": "quantized_layers/model.layers.4.self_attn.v_proj/NVFP4.pth"
181
+ },
182
+ "model.layers.4.self_attn.o_proj": {
183
+ "FP8_Dynamic": "quantized_layers/model.layers.4.self_attn.o_proj/FP8_Dynamic.pth",
184
+ "FP8_Block": "quantized_layers/model.layers.4.self_attn.o_proj/FP8_Block.pth",
185
+ "NVFP4": "quantized_layers/model.layers.4.self_attn.o_proj/NVFP4.pth"
186
+ },
187
+ "model.layers.4.mlp.gate_proj": {
188
+ "FP8_Dynamic": "quantized_layers/model.layers.4.mlp.gate_proj/FP8_Dynamic.pth",
189
+ "FP8_Block": "quantized_layers/model.layers.4.mlp.gate_proj/FP8_Block.pth",
190
+ "NVFP4": "quantized_layers/model.layers.4.mlp.gate_proj/NVFP4.pth"
191
+ },
192
+ "model.layers.4.mlp.up_proj": {
193
+ "FP8_Dynamic": "quantized_layers/model.layers.4.mlp.up_proj/FP8_Dynamic.pth",
194
+ "FP8_Block": "quantized_layers/model.layers.4.mlp.up_proj/FP8_Block.pth",
195
+ "NVFP4": "quantized_layers/model.layers.4.mlp.up_proj/NVFP4.pth"
196
+ },
197
+ "model.layers.4.mlp.down_proj": {
198
+ "FP8_Dynamic": "quantized_layers/model.layers.4.mlp.down_proj/FP8_Dynamic.pth",
199
+ "FP8_Block": "quantized_layers/model.layers.4.mlp.down_proj/FP8_Block.pth",
200
+ "NVFP4": "quantized_layers/model.layers.4.mlp.down_proj/NVFP4.pth"
201
+ },
202
+ "model.layers.5.self_attn.q_proj": {
203
+ "FP8_Dynamic": "quantized_layers/model.layers.5.self_attn.q_proj/FP8_Dynamic.pth",
204
+ "FP8_Block": "quantized_layers/model.layers.5.self_attn.q_proj/FP8_Block.pth",
205
+ "NVFP4": "quantized_layers/model.layers.5.self_attn.q_proj/NVFP4.pth"
206
+ },
207
+ "model.layers.5.self_attn.k_proj": {
208
+ "FP8_Dynamic": "quantized_layers/model.layers.5.self_attn.k_proj/FP8_Dynamic.pth",
209
+ "FP8_Block": "quantized_layers/model.layers.5.self_attn.k_proj/FP8_Block.pth",
210
+ "NVFP4": "quantized_layers/model.layers.5.self_attn.k_proj/NVFP4.pth"
211
+ },
212
+ "model.layers.5.self_attn.v_proj": {
213
+ "FP8_Dynamic": "quantized_layers/model.layers.5.self_attn.v_proj/FP8_Dynamic.pth",
214
+ "FP8_Block": "quantized_layers/model.layers.5.self_attn.v_proj/FP8_Block.pth",
215
+ "NVFP4": "quantized_layers/model.layers.5.self_attn.v_proj/NVFP4.pth"
216
+ },
217
+ "model.layers.5.self_attn.o_proj": {
218
+ "FP8_Dynamic": "quantized_layers/model.layers.5.self_attn.o_proj/FP8_Dynamic.pth",
219
+ "FP8_Block": "quantized_layers/model.layers.5.self_attn.o_proj/FP8_Block.pth",
220
+ "NVFP4": "quantized_layers/model.layers.5.self_attn.o_proj/NVFP4.pth"
221
+ },
222
+ "model.layers.5.mlp.gate_proj": {
223
+ "FP8_Dynamic": "quantized_layers/model.layers.5.mlp.gate_proj/FP8_Dynamic.pth",
224
+ "FP8_Block": "quantized_layers/model.layers.5.mlp.gate_proj/FP8_Block.pth",
225
+ "NVFP4": "quantized_layers/model.layers.5.mlp.gate_proj/NVFP4.pth"
226
+ },
227
+ "model.layers.5.mlp.up_proj": {
228
+ "FP8_Dynamic": "quantized_layers/model.layers.5.mlp.up_proj/FP8_Dynamic.pth",
229
+ "FP8_Block": "quantized_layers/model.layers.5.mlp.up_proj/FP8_Block.pth",
230
+ "NVFP4": "quantized_layers/model.layers.5.mlp.up_proj/NVFP4.pth"
231
+ },
232
+ "model.layers.5.mlp.down_proj": {
233
+ "FP8_Dynamic": "quantized_layers/model.layers.5.mlp.down_proj/FP8_Dynamic.pth",
234
+ "FP8_Block": "quantized_layers/model.layers.5.mlp.down_proj/FP8_Block.pth",
235
+ "NVFP4": "quantized_layers/model.layers.5.mlp.down_proj/NVFP4.pth"
236
+ },
237
+ "model.layers.6.self_attn.q_proj": {
238
+ "FP8_Dynamic": "quantized_layers/model.layers.6.self_attn.q_proj/FP8_Dynamic.pth",
239
+ "FP8_Block": "quantized_layers/model.layers.6.self_attn.q_proj/FP8_Block.pth",
240
+ "NVFP4": "quantized_layers/model.layers.6.self_attn.q_proj/NVFP4.pth"
241
+ },
242
+ "model.layers.6.self_attn.k_proj": {
243
+ "FP8_Dynamic": "quantized_layers/model.layers.6.self_attn.k_proj/FP8_Dynamic.pth",
244
+ "FP8_Block": "quantized_layers/model.layers.6.self_attn.k_proj/FP8_Block.pth",
245
+ "NVFP4": "quantized_layers/model.layers.6.self_attn.k_proj/NVFP4.pth"
246
+ },
247
+ "model.layers.6.self_attn.v_proj": {
248
+ "FP8_Dynamic": "quantized_layers/model.layers.6.self_attn.v_proj/FP8_Dynamic.pth",
249
+ "FP8_Block": "quantized_layers/model.layers.6.self_attn.v_proj/FP8_Block.pth",
250
+ "NVFP4": "quantized_layers/model.layers.6.self_attn.v_proj/NVFP4.pth"
251
+ },
252
+ "model.layers.6.self_attn.o_proj": {
253
+ "FP8_Dynamic": "quantized_layers/model.layers.6.self_attn.o_proj/FP8_Dynamic.pth",
254
+ "FP8_Block": "quantized_layers/model.layers.6.self_attn.o_proj/FP8_Block.pth",
255
+ "NVFP4": "quantized_layers/model.layers.6.self_attn.o_proj/NVFP4.pth"
256
+ },
257
+ "model.layers.6.mlp.gate_proj": {
258
+ "FP8_Dynamic": "quantized_layers/model.layers.6.mlp.gate_proj/FP8_Dynamic.pth",
259
+ "FP8_Block": "quantized_layers/model.layers.6.mlp.gate_proj/FP8_Block.pth",
260
+ "NVFP4": "quantized_layers/model.layers.6.mlp.gate_proj/NVFP4.pth"
261
+ },
262
+ "model.layers.6.mlp.up_proj": {
263
+ "FP8_Dynamic": "quantized_layers/model.layers.6.mlp.up_proj/FP8_Dynamic.pth",
264
+ "FP8_Block": "quantized_layers/model.layers.6.mlp.up_proj/FP8_Block.pth",
265
+ "NVFP4": "quantized_layers/model.layers.6.mlp.up_proj/NVFP4.pth"
266
+ },
267
+ "model.layers.6.mlp.down_proj": {
268
+ "FP8_Dynamic": "quantized_layers/model.layers.6.mlp.down_proj/FP8_Dynamic.pth",
269
+ "FP8_Block": "quantized_layers/model.layers.6.mlp.down_proj/FP8_Block.pth",
270
+ "NVFP4": "quantized_layers/model.layers.6.mlp.down_proj/NVFP4.pth"
271
+ },
272
+ "model.layers.7.self_attn.q_proj": {
273
+ "FP8_Dynamic": "quantized_layers/model.layers.7.self_attn.q_proj/FP8_Dynamic.pth",
274
+ "FP8_Block": "quantized_layers/model.layers.7.self_attn.q_proj/FP8_Block.pth",
275
+ "NVFP4": "quantized_layers/model.layers.7.self_attn.q_proj/NVFP4.pth"
276
+ },
277
+ "model.layers.7.self_attn.k_proj": {
278
+ "FP8_Dynamic": "quantized_layers/model.layers.7.self_attn.k_proj/FP8_Dynamic.pth",
279
+ "FP8_Block": "quantized_layers/model.layers.7.self_attn.k_proj/FP8_Block.pth",
280
+ "NVFP4": "quantized_layers/model.layers.7.self_attn.k_proj/NVFP4.pth"
281
+ },
282
+ "model.layers.7.self_attn.v_proj": {
283
+ "FP8_Dynamic": "quantized_layers/model.layers.7.self_attn.v_proj/FP8_Dynamic.pth",
284
+ "FP8_Block": "quantized_layers/model.layers.7.self_attn.v_proj/FP8_Block.pth",
285
+ "NVFP4": "quantized_layers/model.layers.7.self_attn.v_proj/NVFP4.pth"
286
+ },
287
+ "model.layers.7.self_attn.o_proj": {
288
+ "FP8_Dynamic": "quantized_layers/model.layers.7.self_attn.o_proj/FP8_Dynamic.pth",
289
+ "FP8_Block": "quantized_layers/model.layers.7.self_attn.o_proj/FP8_Block.pth",
290
+ "NVFP4": "quantized_layers/model.layers.7.self_attn.o_proj/NVFP4.pth"
291
+ },
292
+ "model.layers.7.mlp.gate_proj": {
293
+ "FP8_Dynamic": "quantized_layers/model.layers.7.mlp.gate_proj/FP8_Dynamic.pth",
294
+ "FP8_Block": "quantized_layers/model.layers.7.mlp.gate_proj/FP8_Block.pth",
295
+ "NVFP4": "quantized_layers/model.layers.7.mlp.gate_proj/NVFP4.pth"
296
+ },
297
+ "model.layers.7.mlp.up_proj": {
298
+ "FP8_Dynamic": "quantized_layers/model.layers.7.mlp.up_proj/FP8_Dynamic.pth",
299
+ "FP8_Block": "quantized_layers/model.layers.7.mlp.up_proj/FP8_Block.pth",
300
+ "NVFP4": "quantized_layers/model.layers.7.mlp.up_proj/NVFP4.pth"
301
+ },
302
+ "model.layers.7.mlp.down_proj": {
303
+ "FP8_Dynamic": "quantized_layers/model.layers.7.mlp.down_proj/FP8_Dynamic.pth",
304
+ "FP8_Block": "quantized_layers/model.layers.7.mlp.down_proj/FP8_Block.pth",
305
+ "NVFP4": "quantized_layers/model.layers.7.mlp.down_proj/NVFP4.pth"
306
+ },
307
+ "model.layers.8.self_attn.q_proj": {
308
+ "FP8_Dynamic": "quantized_layers/model.layers.8.self_attn.q_proj/FP8_Dynamic.pth",
309
+ "FP8_Block": "quantized_layers/model.layers.8.self_attn.q_proj/FP8_Block.pth",
310
+ "NVFP4": "quantized_layers/model.layers.8.self_attn.q_proj/NVFP4.pth"
311
+ },
312
+ "model.layers.8.self_attn.k_proj": {
313
+ "FP8_Dynamic": "quantized_layers/model.layers.8.self_attn.k_proj/FP8_Dynamic.pth",
314
+ "FP8_Block": "quantized_layers/model.layers.8.self_attn.k_proj/FP8_Block.pth",
315
+ "NVFP4": "quantized_layers/model.layers.8.self_attn.k_proj/NVFP4.pth"
316
+ },
317
+ "model.layers.8.self_attn.v_proj": {
318
+ "FP8_Dynamic": "quantized_layers/model.layers.8.self_attn.v_proj/FP8_Dynamic.pth",
319
+ "FP8_Block": "quantized_layers/model.layers.8.self_attn.v_proj/FP8_Block.pth",
320
+ "NVFP4": "quantized_layers/model.layers.8.self_attn.v_proj/NVFP4.pth"
321
+ },
322
+ "model.layers.8.self_attn.o_proj": {
323
+ "FP8_Dynamic": "quantized_layers/model.layers.8.self_attn.o_proj/FP8_Dynamic.pth",
324
+ "FP8_Block": "quantized_layers/model.layers.8.self_attn.o_proj/FP8_Block.pth",
325
+ "NVFP4": "quantized_layers/model.layers.8.self_attn.o_proj/NVFP4.pth"
326
+ },
327
+ "model.layers.8.mlp.gate_proj": {
328
+ "FP8_Dynamic": "quantized_layers/model.layers.8.mlp.gate_proj/FP8_Dynamic.pth",
329
+ "FP8_Block": "quantized_layers/model.layers.8.mlp.gate_proj/FP8_Block.pth",
330
+ "NVFP4": "quantized_layers/model.layers.8.mlp.gate_proj/NVFP4.pth"
331
+ },
332
+ "model.layers.8.mlp.up_proj": {
333
+ "FP8_Dynamic": "quantized_layers/model.layers.8.mlp.up_proj/FP8_Dynamic.pth",
334
+ "FP8_Block": "quantized_layers/model.layers.8.mlp.up_proj/FP8_Block.pth",
335
+ "NVFP4": "quantized_layers/model.layers.8.mlp.up_proj/NVFP4.pth"
336
+ },
337
+ "model.layers.8.mlp.down_proj": {
338
+ "FP8_Dynamic": "quantized_layers/model.layers.8.mlp.down_proj/FP8_Dynamic.pth",
339
+ "FP8_Block": "quantized_layers/model.layers.8.mlp.down_proj/FP8_Block.pth",
340
+ "NVFP4": "quantized_layers/model.layers.8.mlp.down_proj/NVFP4.pth"
341
+ },
342
+ "model.layers.9.self_attn.q_proj": {
343
+ "FP8_Dynamic": "quantized_layers/model.layers.9.self_attn.q_proj/FP8_Dynamic.pth",
344
+ "FP8_Block": "quantized_layers/model.layers.9.self_attn.q_proj/FP8_Block.pth",
345
+ "NVFP4": "quantized_layers/model.layers.9.self_attn.q_proj/NVFP4.pth"
346
+ },
347
+ "model.layers.9.self_attn.k_proj": {
348
+ "FP8_Dynamic": "quantized_layers/model.layers.9.self_attn.k_proj/FP8_Dynamic.pth",
349
+ "FP8_Block": "quantized_layers/model.layers.9.self_attn.k_proj/FP8_Block.pth",
350
+ "NVFP4": "quantized_layers/model.layers.9.self_attn.k_proj/NVFP4.pth"
351
+ },
352
+ "model.layers.9.self_attn.v_proj": {
353
+ "FP8_Dynamic": "quantized_layers/model.layers.9.self_attn.v_proj/FP8_Dynamic.pth",
354
+ "FP8_Block": "quantized_layers/model.layers.9.self_attn.v_proj/FP8_Block.pth",
355
+ "NVFP4": "quantized_layers/model.layers.9.self_attn.v_proj/NVFP4.pth"
356
+ },
357
+ "model.layers.9.self_attn.o_proj": {
358
+ "FP8_Dynamic": "quantized_layers/model.layers.9.self_attn.o_proj/FP8_Dynamic.pth",
359
+ "FP8_Block": "quantized_layers/model.layers.9.self_attn.o_proj/FP8_Block.pth",
360
+ "NVFP4": "quantized_layers/model.layers.9.self_attn.o_proj/NVFP4.pth"
361
+ },
362
+ "model.layers.9.mlp.gate_proj": {
363
+ "FP8_Dynamic": "quantized_layers/model.layers.9.mlp.gate_proj/FP8_Dynamic.pth",
364
+ "FP8_Block": "quantized_layers/model.layers.9.mlp.gate_proj/FP8_Block.pth",
365
+ "NVFP4": "quantized_layers/model.layers.9.mlp.gate_proj/NVFP4.pth"
366
+ },
367
+ "model.layers.9.mlp.up_proj": {
368
+ "FP8_Dynamic": "quantized_layers/model.layers.9.mlp.up_proj/FP8_Dynamic.pth",
369
+ "FP8_Block": "quantized_layers/model.layers.9.mlp.up_proj/FP8_Block.pth",
370
+ "NVFP4": "quantized_layers/model.layers.9.mlp.up_proj/NVFP4.pth"
371
+ },
372
+ "model.layers.9.mlp.down_proj": {
373
+ "FP8_Dynamic": "quantized_layers/model.layers.9.mlp.down_proj/FP8_Dynamic.pth",
374
+ "FP8_Block": "quantized_layers/model.layers.9.mlp.down_proj/FP8_Block.pth",
375
+ "NVFP4": "quantized_layers/model.layers.9.mlp.down_proj/NVFP4.pth"
376
+ },
377
+ "model.layers.10.self_attn.q_proj": {
378
+ "FP8_Dynamic": "quantized_layers/model.layers.10.self_attn.q_proj/FP8_Dynamic.pth",
379
+ "FP8_Block": "quantized_layers/model.layers.10.self_attn.q_proj/FP8_Block.pth",
380
+ "NVFP4": "quantized_layers/model.layers.10.self_attn.q_proj/NVFP4.pth"
381
+ },
382
+ "model.layers.10.self_attn.k_proj": {
383
+ "FP8_Dynamic": "quantized_layers/model.layers.10.self_attn.k_proj/FP8_Dynamic.pth",
384
+ "FP8_Block": "quantized_layers/model.layers.10.self_attn.k_proj/FP8_Block.pth",
385
+ "NVFP4": "quantized_layers/model.layers.10.self_attn.k_proj/NVFP4.pth"
386
+ },
387
+ "model.layers.10.self_attn.v_proj": {
388
+ "FP8_Dynamic": "quantized_layers/model.layers.10.self_attn.v_proj/FP8_Dynamic.pth",
389
+ "FP8_Block": "quantized_layers/model.layers.10.self_attn.v_proj/FP8_Block.pth",
390
+ "NVFP4": "quantized_layers/model.layers.10.self_attn.v_proj/NVFP4.pth"
391
+ },
392
+ "model.layers.10.self_attn.o_proj": {
393
+ "FP8_Dynamic": "quantized_layers/model.layers.10.self_attn.o_proj/FP8_Dynamic.pth",
394
+ "FP8_Block": "quantized_layers/model.layers.10.self_attn.o_proj/FP8_Block.pth",
395
+ "NVFP4": "quantized_layers/model.layers.10.self_attn.o_proj/NVFP4.pth"
396
+ },
397
+ "model.layers.10.mlp.gate_proj": {
398
+ "FP8_Dynamic": "quantized_layers/model.layers.10.mlp.gate_proj/FP8_Dynamic.pth",
399
+ "FP8_Block": "quantized_layers/model.layers.10.mlp.gate_proj/FP8_Block.pth",
400
+ "NVFP4": "quantized_layers/model.layers.10.mlp.gate_proj/NVFP4.pth"
401
+ },
402
+ "model.layers.10.mlp.up_proj": {
403
+ "FP8_Dynamic": "quantized_layers/model.layers.10.mlp.up_proj/FP8_Dynamic.pth",
404
+ "FP8_Block": "quantized_layers/model.layers.10.mlp.up_proj/FP8_Block.pth",
405
+ "NVFP4": "quantized_layers/model.layers.10.mlp.up_proj/NVFP4.pth"
406
+ },
407
+ "model.layers.10.mlp.down_proj": {
408
+ "FP8_Dynamic": "quantized_layers/model.layers.10.mlp.down_proj/FP8_Dynamic.pth",
409
+ "FP8_Block": "quantized_layers/model.layers.10.mlp.down_proj/FP8_Block.pth",
410
+ "NVFP4": "quantized_layers/model.layers.10.mlp.down_proj/NVFP4.pth"
411
+ },
412
+ "model.layers.11.self_attn.q_proj": {
413
+ "FP8_Dynamic": "quantized_layers/model.layers.11.self_attn.q_proj/FP8_Dynamic.pth",
414
+ "FP8_Block": "quantized_layers/model.layers.11.self_attn.q_proj/FP8_Block.pth",
415
+ "NVFP4": "quantized_layers/model.layers.11.self_attn.q_proj/NVFP4.pth"
416
+ },
417
+ "model.layers.11.self_attn.k_proj": {
418
+ "FP8_Dynamic": "quantized_layers/model.layers.11.self_attn.k_proj/FP8_Dynamic.pth",
419
+ "FP8_Block": "quantized_layers/model.layers.11.self_attn.k_proj/FP8_Block.pth",
420
+ "NVFP4": "quantized_layers/model.layers.11.self_attn.k_proj/NVFP4.pth"
421
+ },
422
+ "model.layers.11.self_attn.v_proj": {
423
+ "FP8_Dynamic": "quantized_layers/model.layers.11.self_attn.v_proj/FP8_Dynamic.pth",
424
+ "FP8_Block": "quantized_layers/model.layers.11.self_attn.v_proj/FP8_Block.pth",
425
+ "NVFP4": "quantized_layers/model.layers.11.self_attn.v_proj/NVFP4.pth"
426
+ },
427
+ "model.layers.11.self_attn.o_proj": {
428
+ "FP8_Dynamic": "quantized_layers/model.layers.11.self_attn.o_proj/FP8_Dynamic.pth",
429
+ "FP8_Block": "quantized_layers/model.layers.11.self_attn.o_proj/FP8_Block.pth",
430
+ "NVFP4": "quantized_layers/model.layers.11.self_attn.o_proj/NVFP4.pth"
431
+ },
432
+ "model.layers.11.mlp.gate_proj": {
433
+ "FP8_Dynamic": "quantized_layers/model.layers.11.mlp.gate_proj/FP8_Dynamic.pth",
434
+ "FP8_Block": "quantized_layers/model.layers.11.mlp.gate_proj/FP8_Block.pth",
435
+ "NVFP4": "quantized_layers/model.layers.11.mlp.gate_proj/NVFP4.pth"
436
+ },
437
+ "model.layers.11.mlp.up_proj": {
438
+ "FP8_Dynamic": "quantized_layers/model.layers.11.mlp.up_proj/FP8_Dynamic.pth",
439
+ "FP8_Block": "quantized_layers/model.layers.11.mlp.up_proj/FP8_Block.pth",
440
+ "NVFP4": "quantized_layers/model.layers.11.mlp.up_proj/NVFP4.pth"
441
+ },
442
+ "model.layers.11.mlp.down_proj": {
443
+ "FP8_Dynamic": "quantized_layers/model.layers.11.mlp.down_proj/FP8_Dynamic.pth",
444
+ "FP8_Block": "quantized_layers/model.layers.11.mlp.down_proj/FP8_Block.pth",
445
+ "NVFP4": "quantized_layers/model.layers.11.mlp.down_proj/NVFP4.pth"
446
+ },
447
+ "model.layers.12.self_attn.q_proj": {
448
+ "FP8_Dynamic": "quantized_layers/model.layers.12.self_attn.q_proj/FP8_Dynamic.pth",
449
+ "FP8_Block": "quantized_layers/model.layers.12.self_attn.q_proj/FP8_Block.pth",
450
+ "NVFP4": "quantized_layers/model.layers.12.self_attn.q_proj/NVFP4.pth"
451
+ },
452
+ "model.layers.12.self_attn.k_proj": {
453
+ "FP8_Dynamic": "quantized_layers/model.layers.12.self_attn.k_proj/FP8_Dynamic.pth",
454
+ "FP8_Block": "quantized_layers/model.layers.12.self_attn.k_proj/FP8_Block.pth",
455
+ "NVFP4": "quantized_layers/model.layers.12.self_attn.k_proj/NVFP4.pth"
456
+ },
457
+ "model.layers.12.self_attn.v_proj": {
458
+ "FP8_Dynamic": "quantized_layers/model.layers.12.self_attn.v_proj/FP8_Dynamic.pth",
459
+ "FP8_Block": "quantized_layers/model.layers.12.self_attn.v_proj/FP8_Block.pth",
460
+ "NVFP4": "quantized_layers/model.layers.12.self_attn.v_proj/NVFP4.pth"
461
+ },
462
+ "model.layers.12.self_attn.o_proj": {
463
+ "FP8_Dynamic": "quantized_layers/model.layers.12.self_attn.o_proj/FP8_Dynamic.pth",
464
+ "FP8_Block": "quantized_layers/model.layers.12.self_attn.o_proj/FP8_Block.pth",
465
+ "NVFP4": "quantized_layers/model.layers.12.self_attn.o_proj/NVFP4.pth"
466
+ },
467
+ "model.layers.12.mlp.gate_proj": {
468
+ "FP8_Dynamic": "quantized_layers/model.layers.12.mlp.gate_proj/FP8_Dynamic.pth",
469
+ "FP8_Block": "quantized_layers/model.layers.12.mlp.gate_proj/FP8_Block.pth",
470
+ "NVFP4": "quantized_layers/model.layers.12.mlp.gate_proj/NVFP4.pth"
471
+ },
472
+ "model.layers.12.mlp.up_proj": {
473
+ "FP8_Dynamic": "quantized_layers/model.layers.12.mlp.up_proj/FP8_Dynamic.pth",
474
+ "FP8_Block": "quantized_layers/model.layers.12.mlp.up_proj/FP8_Block.pth",
475
+ "NVFP4": "quantized_layers/model.layers.12.mlp.up_proj/NVFP4.pth"
476
+ },
477
+ "model.layers.12.mlp.down_proj": {
478
+ "FP8_Dynamic": "quantized_layers/model.layers.12.mlp.down_proj/FP8_Dynamic.pth",
479
+ "FP8_Block": "quantized_layers/model.layers.12.mlp.down_proj/FP8_Block.pth",
480
+ "NVFP4": "quantized_layers/model.layers.12.mlp.down_proj/NVFP4.pth"
481
+ },
482
+ "model.layers.13.self_attn.q_proj": {
483
+ "FP8_Dynamic": "quantized_layers/model.layers.13.self_attn.q_proj/FP8_Dynamic.pth",
484
+ "FP8_Block": "quantized_layers/model.layers.13.self_attn.q_proj/FP8_Block.pth",
485
+ "NVFP4": "quantized_layers/model.layers.13.self_attn.q_proj/NVFP4.pth"
486
+ },
487
+ "model.layers.13.self_attn.k_proj": {
488
+ "FP8_Dynamic": "quantized_layers/model.layers.13.self_attn.k_proj/FP8_Dynamic.pth",
489
+ "FP8_Block": "quantized_layers/model.layers.13.self_attn.k_proj/FP8_Block.pth",
490
+ "NVFP4": "quantized_layers/model.layers.13.self_attn.k_proj/NVFP4.pth"
491
+ },
492
+ "model.layers.13.self_attn.v_proj": {
493
+ "FP8_Dynamic": "quantized_layers/model.layers.13.self_attn.v_proj/FP8_Dynamic.pth",
494
+ "FP8_Block": "quantized_layers/model.layers.13.self_attn.v_proj/FP8_Block.pth",
495
+ "NVFP4": "quantized_layers/model.layers.13.self_attn.v_proj/NVFP4.pth"
496
+ },
497
+ "model.layers.13.self_attn.o_proj": {
498
+ "FP8_Dynamic": "quantized_layers/model.layers.13.self_attn.o_proj/FP8_Dynamic.pth",
499
+ "FP8_Block": "quantized_layers/model.layers.13.self_attn.o_proj/FP8_Block.pth",
500
+ "NVFP4": "quantized_layers/model.layers.13.self_attn.o_proj/NVFP4.pth"
501
+ },
502
+ "model.layers.13.mlp.gate_proj": {
503
+ "FP8_Dynamic": "quantized_layers/model.layers.13.mlp.gate_proj/FP8_Dynamic.pth",
504
+ "FP8_Block": "quantized_layers/model.layers.13.mlp.gate_proj/FP8_Block.pth",
505
+ "NVFP4": "quantized_layers/model.layers.13.mlp.gate_proj/NVFP4.pth"
506
+ },
507
+ "model.layers.13.mlp.up_proj": {
508
+ "FP8_Dynamic": "quantized_layers/model.layers.13.mlp.up_proj/FP8_Dynamic.pth",
509
+ "FP8_Block": "quantized_layers/model.layers.13.mlp.up_proj/FP8_Block.pth",
510
+ "NVFP4": "quantized_layers/model.layers.13.mlp.up_proj/NVFP4.pth"
511
+ },
512
+ "model.layers.13.mlp.down_proj": {
513
+ "FP8_Dynamic": "quantized_layers/model.layers.13.mlp.down_proj/FP8_Dynamic.pth",
514
+ "FP8_Block": "quantized_layers/model.layers.13.mlp.down_proj/FP8_Block.pth",
515
+ "NVFP4": "quantized_layers/model.layers.13.mlp.down_proj/NVFP4.pth"
516
+ },
517
+ "model.layers.14.self_attn.q_proj": {
518
+ "FP8_Dynamic": "quantized_layers/model.layers.14.self_attn.q_proj/FP8_Dynamic.pth",
519
+ "FP8_Block": "quantized_layers/model.layers.14.self_attn.q_proj/FP8_Block.pth",
520
+ "NVFP4": "quantized_layers/model.layers.14.self_attn.q_proj/NVFP4.pth"
521
+ },
522
+ "model.layers.14.self_attn.k_proj": {
523
+ "FP8_Dynamic": "quantized_layers/model.layers.14.self_attn.k_proj/FP8_Dynamic.pth",
524
+ "FP8_Block": "quantized_layers/model.layers.14.self_attn.k_proj/FP8_Block.pth",
525
+ "NVFP4": "quantized_layers/model.layers.14.self_attn.k_proj/NVFP4.pth"
526
+ },
527
+ "model.layers.14.self_attn.v_proj": {
528
+ "FP8_Dynamic": "quantized_layers/model.layers.14.self_attn.v_proj/FP8_Dynamic.pth",
529
+ "FP8_Block": "quantized_layers/model.layers.14.self_attn.v_proj/FP8_Block.pth",
530
+ "NVFP4": "quantized_layers/model.layers.14.self_attn.v_proj/NVFP4.pth"
531
+ },
532
+ "model.layers.14.self_attn.o_proj": {
533
+ "FP8_Dynamic": "quantized_layers/model.layers.14.self_attn.o_proj/FP8_Dynamic.pth",
534
+ "FP8_Block": "quantized_layers/model.layers.14.self_attn.o_proj/FP8_Block.pth",
535
+ "NVFP4": "quantized_layers/model.layers.14.self_attn.o_proj/NVFP4.pth"
536
+ },
537
+ "model.layers.14.mlp.gate_proj": {
538
+ "FP8_Dynamic": "quantized_layers/model.layers.14.mlp.gate_proj/FP8_Dynamic.pth",
539
+ "FP8_Block": "quantized_layers/model.layers.14.mlp.gate_proj/FP8_Block.pth",
540
+ "NVFP4": "quantized_layers/model.layers.14.mlp.gate_proj/NVFP4.pth"
541
+ },
542
+ "model.layers.14.mlp.up_proj": {
543
+ "FP8_Dynamic": "quantized_layers/model.layers.14.mlp.up_proj/FP8_Dynamic.pth",
544
+ "FP8_Block": "quantized_layers/model.layers.14.mlp.up_proj/FP8_Block.pth",
545
+ "NVFP4": "quantized_layers/model.layers.14.mlp.up_proj/NVFP4.pth"
546
+ },
547
+ "model.layers.14.mlp.down_proj": {
548
+ "FP8_Dynamic": "quantized_layers/model.layers.14.mlp.down_proj/FP8_Dynamic.pth",
549
+ "FP8_Block": "quantized_layers/model.layers.14.mlp.down_proj/FP8_Block.pth",
550
+ "NVFP4": "quantized_layers/model.layers.14.mlp.down_proj/NVFP4.pth"
551
+ },
552
+ "model.layers.15.self_attn.q_proj": {
553
+ "FP8_Dynamic": "quantized_layers/model.layers.15.self_attn.q_proj/FP8_Dynamic.pth",
554
+ "FP8_Block": "quantized_layers/model.layers.15.self_attn.q_proj/FP8_Block.pth",
555
+ "NVFP4": "quantized_layers/model.layers.15.self_attn.q_proj/NVFP4.pth"
556
+ },
557
+ "model.layers.15.self_attn.k_proj": {
558
+ "FP8_Dynamic": "quantized_layers/model.layers.15.self_attn.k_proj/FP8_Dynamic.pth",
559
+ "FP8_Block": "quantized_layers/model.layers.15.self_attn.k_proj/FP8_Block.pth",
560
+ "NVFP4": "quantized_layers/model.layers.15.self_attn.k_proj/NVFP4.pth"
561
+ },
562
+ "model.layers.15.self_attn.v_proj": {
563
+ "FP8_Dynamic": "quantized_layers/model.layers.15.self_attn.v_proj/FP8_Dynamic.pth",
564
+ "FP8_Block": "quantized_layers/model.layers.15.self_attn.v_proj/FP8_Block.pth",
565
+ "NVFP4": "quantized_layers/model.layers.15.self_attn.v_proj/NVFP4.pth"
566
+ },
567
+ "model.layers.15.self_attn.o_proj": {
568
+ "FP8_Dynamic": "quantized_layers/model.layers.15.self_attn.o_proj/FP8_Dynamic.pth",
569
+ "FP8_Block": "quantized_layers/model.layers.15.self_attn.o_proj/FP8_Block.pth",
570
+ "NVFP4": "quantized_layers/model.layers.15.self_attn.o_proj/NVFP4.pth"
571
+ },
572
+ "model.layers.15.mlp.gate_proj": {
573
+ "FP8_Dynamic": "quantized_layers/model.layers.15.mlp.gate_proj/FP8_Dynamic.pth",
574
+ "FP8_Block": "quantized_layers/model.layers.15.mlp.gate_proj/FP8_Block.pth",
575
+ "NVFP4": "quantized_layers/model.layers.15.mlp.gate_proj/NVFP4.pth"
576
+ },
577
+ "model.layers.15.mlp.up_proj": {
578
+ "FP8_Dynamic": "quantized_layers/model.layers.15.mlp.up_proj/FP8_Dynamic.pth",
579
+ "FP8_Block": "quantized_layers/model.layers.15.mlp.up_proj/FP8_Block.pth",
580
+ "NVFP4": "quantized_layers/model.layers.15.mlp.up_proj/NVFP4.pth"
581
+ },
582
+ "model.layers.15.mlp.down_proj": {
583
+ "FP8_Dynamic": "quantized_layers/model.layers.15.mlp.down_proj/FP8_Dynamic.pth",
584
+ "FP8_Block": "quantized_layers/model.layers.15.mlp.down_proj/FP8_Block.pth",
585
+ "NVFP4": "quantized_layers/model.layers.15.mlp.down_proj/NVFP4.pth"
586
+ },
587
+ "model.layers.16.self_attn.q_proj": {
588
+ "FP8_Dynamic": "quantized_layers/model.layers.16.self_attn.q_proj/FP8_Dynamic.pth",
589
+ "FP8_Block": "quantized_layers/model.layers.16.self_attn.q_proj/FP8_Block.pth",
590
+ "NVFP4": "quantized_layers/model.layers.16.self_attn.q_proj/NVFP4.pth"
591
+ },
592
+ "model.layers.16.self_attn.k_proj": {
593
+ "FP8_Dynamic": "quantized_layers/model.layers.16.self_attn.k_proj/FP8_Dynamic.pth",
594
+ "FP8_Block": "quantized_layers/model.layers.16.self_attn.k_proj/FP8_Block.pth",
595
+ "NVFP4": "quantized_layers/model.layers.16.self_attn.k_proj/NVFP4.pth"
596
+ },
597
+ "model.layers.16.self_attn.v_proj": {
598
+ "FP8_Dynamic": "quantized_layers/model.layers.16.self_attn.v_proj/FP8_Dynamic.pth",
599
+ "FP8_Block": "quantized_layers/model.layers.16.self_attn.v_proj/FP8_Block.pth",
600
+ "NVFP4": "quantized_layers/model.layers.16.self_attn.v_proj/NVFP4.pth"
601
+ },
602
+ "model.layers.16.self_attn.o_proj": {
603
+ "FP8_Dynamic": "quantized_layers/model.layers.16.self_attn.o_proj/FP8_Dynamic.pth",
604
+ "FP8_Block": "quantized_layers/model.layers.16.self_attn.o_proj/FP8_Block.pth",
605
+ "NVFP4": "quantized_layers/model.layers.16.self_attn.o_proj/NVFP4.pth"
606
+ },
607
+ "model.layers.16.mlp.gate_proj": {
608
+ "FP8_Dynamic": "quantized_layers/model.layers.16.mlp.gate_proj/FP8_Dynamic.pth",
609
+ "FP8_Block": "quantized_layers/model.layers.16.mlp.gate_proj/FP8_Block.pth",
610
+ "NVFP4": "quantized_layers/model.layers.16.mlp.gate_proj/NVFP4.pth"
611
+ },
612
+ "model.layers.16.mlp.up_proj": {
613
+ "FP8_Dynamic": "quantized_layers/model.layers.16.mlp.up_proj/FP8_Dynamic.pth",
614
+ "FP8_Block": "quantized_layers/model.layers.16.mlp.up_proj/FP8_Block.pth",
615
+ "NVFP4": "quantized_layers/model.layers.16.mlp.up_proj/NVFP4.pth"
616
+ },
617
+ "model.layers.16.mlp.down_proj": {
618
+ "FP8_Dynamic": "quantized_layers/model.layers.16.mlp.down_proj/FP8_Dynamic.pth",
619
+ "FP8_Block": "quantized_layers/model.layers.16.mlp.down_proj/FP8_Block.pth",
620
+ "NVFP4": "quantized_layers/model.layers.16.mlp.down_proj/NVFP4.pth"
621
+ },
622
+ "model.layers.17.self_attn.q_proj": {
623
+ "FP8_Dynamic": "quantized_layers/model.layers.17.self_attn.q_proj/FP8_Dynamic.pth",
624
+ "FP8_Block": "quantized_layers/model.layers.17.self_attn.q_proj/FP8_Block.pth",
625
+ "NVFP4": "quantized_layers/model.layers.17.self_attn.q_proj/NVFP4.pth"
626
+ },
627
+ "model.layers.17.self_attn.k_proj": {
628
+ "FP8_Dynamic": "quantized_layers/model.layers.17.self_attn.k_proj/FP8_Dynamic.pth",
629
+ "FP8_Block": "quantized_layers/model.layers.17.self_attn.k_proj/FP8_Block.pth",
630
+ "NVFP4": "quantized_layers/model.layers.17.self_attn.k_proj/NVFP4.pth"
631
+ },
632
+ "model.layers.17.self_attn.v_proj": {
633
+ "FP8_Dynamic": "quantized_layers/model.layers.17.self_attn.v_proj/FP8_Dynamic.pth",
634
+ "FP8_Block": "quantized_layers/model.layers.17.self_attn.v_proj/FP8_Block.pth",
635
+ "NVFP4": "quantized_layers/model.layers.17.self_attn.v_proj/NVFP4.pth"
636
+ },
637
+ "model.layers.17.self_attn.o_proj": {
638
+ "FP8_Dynamic": "quantized_layers/model.layers.17.self_attn.o_proj/FP8_Dynamic.pth",
639
+ "FP8_Block": "quantized_layers/model.layers.17.self_attn.o_proj/FP8_Block.pth",
640
+ "NVFP4": "quantized_layers/model.layers.17.self_attn.o_proj/NVFP4.pth"
641
+ },
642
+ "model.layers.17.mlp.gate_proj": {
643
+ "FP8_Dynamic": "quantized_layers/model.layers.17.mlp.gate_proj/FP8_Dynamic.pth",
644
+ "FP8_Block": "quantized_layers/model.layers.17.mlp.gate_proj/FP8_Block.pth",
645
+ "NVFP4": "quantized_layers/model.layers.17.mlp.gate_proj/NVFP4.pth"
646
+ },
647
+ "model.layers.17.mlp.up_proj": {
648
+ "FP8_Dynamic": "quantized_layers/model.layers.17.mlp.up_proj/FP8_Dynamic.pth",
649
+ "FP8_Block": "quantized_layers/model.layers.17.mlp.up_proj/FP8_Block.pth",
650
+ "NVFP4": "quantized_layers/model.layers.17.mlp.up_proj/NVFP4.pth"
651
+ },
652
+ "model.layers.17.mlp.down_proj": {
653
+ "FP8_Dynamic": "quantized_layers/model.layers.17.mlp.down_proj/FP8_Dynamic.pth",
654
+ "FP8_Block": "quantized_layers/model.layers.17.mlp.down_proj/FP8_Block.pth",
655
+ "NVFP4": "quantized_layers/model.layers.17.mlp.down_proj/NVFP4.pth"
656
+ },
657
+ "model.layers.18.self_attn.q_proj": {
658
+ "FP8_Dynamic": "quantized_layers/model.layers.18.self_attn.q_proj/FP8_Dynamic.pth",
659
+ "FP8_Block": "quantized_layers/model.layers.18.self_attn.q_proj/FP8_Block.pth",
660
+ "NVFP4": "quantized_layers/model.layers.18.self_attn.q_proj/NVFP4.pth"
661
+ },
662
+ "model.layers.18.self_attn.k_proj": {
663
+ "FP8_Dynamic": "quantized_layers/model.layers.18.self_attn.k_proj/FP8_Dynamic.pth",
664
+ "FP8_Block": "quantized_layers/model.layers.18.self_attn.k_proj/FP8_Block.pth",
665
+ "NVFP4": "quantized_layers/model.layers.18.self_attn.k_proj/NVFP4.pth"
666
+ },
667
+ "model.layers.18.self_attn.v_proj": {
668
+ "FP8_Dynamic": "quantized_layers/model.layers.18.self_attn.v_proj/FP8_Dynamic.pth",
669
+ "FP8_Block": "quantized_layers/model.layers.18.self_attn.v_proj/FP8_Block.pth",
670
+ "NVFP4": "quantized_layers/model.layers.18.self_attn.v_proj/NVFP4.pth"
671
+ },
672
+ "model.layers.18.self_attn.o_proj": {
673
+ "FP8_Dynamic": "quantized_layers/model.layers.18.self_attn.o_proj/FP8_Dynamic.pth",
674
+ "FP8_Block": "quantized_layers/model.layers.18.self_attn.o_proj/FP8_Block.pth",
675
+ "NVFP4": "quantized_layers/model.layers.18.self_attn.o_proj/NVFP4.pth"
676
+ },
677
+ "model.layers.18.mlp.gate_proj": {
678
+ "FP8_Dynamic": "quantized_layers/model.layers.18.mlp.gate_proj/FP8_Dynamic.pth",
679
+ "FP8_Block": "quantized_layers/model.layers.18.mlp.gate_proj/FP8_Block.pth",
680
+ "NVFP4": "quantized_layers/model.layers.18.mlp.gate_proj/NVFP4.pth"
681
+ },
682
+ "model.layers.18.mlp.up_proj": {
683
+ "FP8_Dynamic": "quantized_layers/model.layers.18.mlp.up_proj/FP8_Dynamic.pth",
684
+ "FP8_Block": "quantized_layers/model.layers.18.mlp.up_proj/FP8_Block.pth",
685
+ "NVFP4": "quantized_layers/model.layers.18.mlp.up_proj/NVFP4.pth"
686
+ },
687
+ "model.layers.18.mlp.down_proj": {
688
+ "FP8_Dynamic": "quantized_layers/model.layers.18.mlp.down_proj/FP8_Dynamic.pth",
689
+ "FP8_Block": "quantized_layers/model.layers.18.mlp.down_proj/FP8_Block.pth",
690
+ "NVFP4": "quantized_layers/model.layers.18.mlp.down_proj/NVFP4.pth"
691
+ },
692
+ "model.layers.19.self_attn.q_proj": {
693
+ "FP8_Dynamic": "quantized_layers/model.layers.19.self_attn.q_proj/FP8_Dynamic.pth",
694
+ "FP8_Block": "quantized_layers/model.layers.19.self_attn.q_proj/FP8_Block.pth",
695
+ "NVFP4": "quantized_layers/model.layers.19.self_attn.q_proj/NVFP4.pth"
696
+ },
697
+ "model.layers.19.self_attn.k_proj": {
698
+ "FP8_Dynamic": "quantized_layers/model.layers.19.self_attn.k_proj/FP8_Dynamic.pth",
699
+ "FP8_Block": "quantized_layers/model.layers.19.self_attn.k_proj/FP8_Block.pth",
700
+ "NVFP4": "quantized_layers/model.layers.19.self_attn.k_proj/NVFP4.pth"
701
+ },
702
+ "model.layers.19.self_attn.v_proj": {
703
+ "FP8_Dynamic": "quantized_layers/model.layers.19.self_attn.v_proj/FP8_Dynamic.pth",
704
+ "FP8_Block": "quantized_layers/model.layers.19.self_attn.v_proj/FP8_Block.pth",
705
+ "NVFP4": "quantized_layers/model.layers.19.self_attn.v_proj/NVFP4.pth"
706
+ },
707
+ "model.layers.19.self_attn.o_proj": {
708
+ "FP8_Dynamic": "quantized_layers/model.layers.19.self_attn.o_proj/FP8_Dynamic.pth",
709
+ "FP8_Block": "quantized_layers/model.layers.19.self_attn.o_proj/FP8_Block.pth",
710
+ "NVFP4": "quantized_layers/model.layers.19.self_attn.o_proj/NVFP4.pth"
711
+ },
712
+ "model.layers.19.mlp.gate_proj": {
713
+ "FP8_Dynamic": "quantized_layers/model.layers.19.mlp.gate_proj/FP8_Dynamic.pth",
714
+ "FP8_Block": "quantized_layers/model.layers.19.mlp.gate_proj/FP8_Block.pth",
715
+ "NVFP4": "quantized_layers/model.layers.19.mlp.gate_proj/NVFP4.pth"
716
+ },
717
+ "model.layers.19.mlp.up_proj": {
718
+ "FP8_Dynamic": "quantized_layers/model.layers.19.mlp.up_proj/FP8_Dynamic.pth",
719
+ "FP8_Block": "quantized_layers/model.layers.19.mlp.up_proj/FP8_Block.pth",
720
+ "NVFP4": "quantized_layers/model.layers.19.mlp.up_proj/NVFP4.pth"
721
+ },
722
+ "model.layers.19.mlp.down_proj": {
723
+ "FP8_Dynamic": "quantized_layers/model.layers.19.mlp.down_proj/FP8_Dynamic.pth",
724
+ "FP8_Block": "quantized_layers/model.layers.19.mlp.down_proj/FP8_Block.pth",
725
+ "NVFP4": "quantized_layers/model.layers.19.mlp.down_proj/NVFP4.pth"
726
+ },
727
+ "model.layers.20.self_attn.q_proj": {
728
+ "FP8_Dynamic": "quantized_layers/model.layers.20.self_attn.q_proj/FP8_Dynamic.pth",
729
+ "FP8_Block": "quantized_layers/model.layers.20.self_attn.q_proj/FP8_Block.pth",
730
+ "NVFP4": "quantized_layers/model.layers.20.self_attn.q_proj/NVFP4.pth"
731
+ },
732
+ "model.layers.20.self_attn.k_proj": {
733
+ "FP8_Dynamic": "quantized_layers/model.layers.20.self_attn.k_proj/FP8_Dynamic.pth",
734
+ "FP8_Block": "quantized_layers/model.layers.20.self_attn.k_proj/FP8_Block.pth",
735
+ "NVFP4": "quantized_layers/model.layers.20.self_attn.k_proj/NVFP4.pth"
736
+ },
737
+ "model.layers.20.self_attn.v_proj": {
738
+ "FP8_Dynamic": "quantized_layers/model.layers.20.self_attn.v_proj/FP8_Dynamic.pth",
739
+ "FP8_Block": "quantized_layers/model.layers.20.self_attn.v_proj/FP8_Block.pth",
740
+ "NVFP4": "quantized_layers/model.layers.20.self_attn.v_proj/NVFP4.pth"
741
+ },
742
+ "model.layers.20.self_attn.o_proj": {
743
+ "FP8_Dynamic": "quantized_layers/model.layers.20.self_attn.o_proj/FP8_Dynamic.pth",
744
+ "FP8_Block": "quantized_layers/model.layers.20.self_attn.o_proj/FP8_Block.pth",
745
+ "NVFP4": "quantized_layers/model.layers.20.self_attn.o_proj/NVFP4.pth"
746
+ },
747
+ "model.layers.20.mlp.gate_proj": {
748
+ "FP8_Dynamic": "quantized_layers/model.layers.20.mlp.gate_proj/FP8_Dynamic.pth",
749
+ "FP8_Block": "quantized_layers/model.layers.20.mlp.gate_proj/FP8_Block.pth",
750
+ "NVFP4": "quantized_layers/model.layers.20.mlp.gate_proj/NVFP4.pth"
751
+ },
752
+ "model.layers.20.mlp.up_proj": {
753
+ "FP8_Dynamic": "quantized_layers/model.layers.20.mlp.up_proj/FP8_Dynamic.pth",
754
+ "FP8_Block": "quantized_layers/model.layers.20.mlp.up_proj/FP8_Block.pth",
755
+ "NVFP4": "quantized_layers/model.layers.20.mlp.up_proj/NVFP4.pth"
756
+ },
757
+ "model.layers.20.mlp.down_proj": {
758
+ "FP8_Dynamic": "quantized_layers/model.layers.20.mlp.down_proj/FP8_Dynamic.pth",
759
+ "FP8_Block": "quantized_layers/model.layers.20.mlp.down_proj/FP8_Block.pth",
760
+ "NVFP4": "quantized_layers/model.layers.20.mlp.down_proj/NVFP4.pth"
761
+ },
762
+ "model.layers.21.self_attn.q_proj": {
763
+ "FP8_Dynamic": "quantized_layers/model.layers.21.self_attn.q_proj/FP8_Dynamic.pth",
764
+ "FP8_Block": "quantized_layers/model.layers.21.self_attn.q_proj/FP8_Block.pth",
765
+ "NVFP4": "quantized_layers/model.layers.21.self_attn.q_proj/NVFP4.pth"
766
+ },
767
+ "model.layers.21.self_attn.k_proj": {
768
+ "FP8_Dynamic": "quantized_layers/model.layers.21.self_attn.k_proj/FP8_Dynamic.pth",
769
+ "FP8_Block": "quantized_layers/model.layers.21.self_attn.k_proj/FP8_Block.pth",
770
+ "NVFP4": "quantized_layers/model.layers.21.self_attn.k_proj/NVFP4.pth"
771
+ },
772
+ "model.layers.21.self_attn.v_proj": {
773
+ "FP8_Dynamic": "quantized_layers/model.layers.21.self_attn.v_proj/FP8_Dynamic.pth",
774
+ "FP8_Block": "quantized_layers/model.layers.21.self_attn.v_proj/FP8_Block.pth",
775
+ "NVFP4": "quantized_layers/model.layers.21.self_attn.v_proj/NVFP4.pth"
776
+ },
777
+ "model.layers.21.self_attn.o_proj": {
778
+ "FP8_Dynamic": "quantized_layers/model.layers.21.self_attn.o_proj/FP8_Dynamic.pth",
779
+ "FP8_Block": "quantized_layers/model.layers.21.self_attn.o_proj/FP8_Block.pth",
780
+ "NVFP4": "quantized_layers/model.layers.21.self_attn.o_proj/NVFP4.pth"
781
+ },
782
+ "model.layers.21.mlp.gate_proj": {
783
+ "FP8_Dynamic": "quantized_layers/model.layers.21.mlp.gate_proj/FP8_Dynamic.pth",
784
+ "FP8_Block": "quantized_layers/model.layers.21.mlp.gate_proj/FP8_Block.pth",
785
+ "NVFP4": "quantized_layers/model.layers.21.mlp.gate_proj/NVFP4.pth"
786
+ },
787
+ "model.layers.21.mlp.up_proj": {
788
+ "FP8_Dynamic": "quantized_layers/model.layers.21.mlp.up_proj/FP8_Dynamic.pth",
789
+ "FP8_Block": "quantized_layers/model.layers.21.mlp.up_proj/FP8_Block.pth",
790
+ "NVFP4": "quantized_layers/model.layers.21.mlp.up_proj/NVFP4.pth"
791
+ },
792
+ "model.layers.21.mlp.down_proj": {
793
+ "FP8_Dynamic": "quantized_layers/model.layers.21.mlp.down_proj/FP8_Dynamic.pth",
794
+ "FP8_Block": "quantized_layers/model.layers.21.mlp.down_proj/FP8_Block.pth",
795
+ "NVFP4": "quantized_layers/model.layers.21.mlp.down_proj/NVFP4.pth"
796
+ },
797
+ "model.layers.22.self_attn.q_proj": {
798
+ "FP8_Dynamic": "quantized_layers/model.layers.22.self_attn.q_proj/FP8_Dynamic.pth",
799
+ "FP8_Block": "quantized_layers/model.layers.22.self_attn.q_proj/FP8_Block.pth",
800
+ "NVFP4": "quantized_layers/model.layers.22.self_attn.q_proj/NVFP4.pth"
801
+ },
802
+ "model.layers.22.self_attn.k_proj": {
803
+ "FP8_Dynamic": "quantized_layers/model.layers.22.self_attn.k_proj/FP8_Dynamic.pth",
804
+ "FP8_Block": "quantized_layers/model.layers.22.self_attn.k_proj/FP8_Block.pth",
805
+ "NVFP4": "quantized_layers/model.layers.22.self_attn.k_proj/NVFP4.pth"
806
+ },
807
+ "model.layers.22.self_attn.v_proj": {
808
+ "FP8_Dynamic": "quantized_layers/model.layers.22.self_attn.v_proj/FP8_Dynamic.pth",
809
+ "FP8_Block": "quantized_layers/model.layers.22.self_attn.v_proj/FP8_Block.pth",
810
+ "NVFP4": "quantized_layers/model.layers.22.self_attn.v_proj/NVFP4.pth"
811
+ },
812
+ "model.layers.22.self_attn.o_proj": {
813
+ "FP8_Dynamic": "quantized_layers/model.layers.22.self_attn.o_proj/FP8_Dynamic.pth",
814
+ "FP8_Block": "quantized_layers/model.layers.22.self_attn.o_proj/FP8_Block.pth",
815
+ "NVFP4": "quantized_layers/model.layers.22.self_attn.o_proj/NVFP4.pth"
816
+ },
817
+ "model.layers.22.mlp.gate_proj": {
818
+ "FP8_Dynamic": "quantized_layers/model.layers.22.mlp.gate_proj/FP8_Dynamic.pth",
819
+ "FP8_Block": "quantized_layers/model.layers.22.mlp.gate_proj/FP8_Block.pth",
820
+ "NVFP4": "quantized_layers/model.layers.22.mlp.gate_proj/NVFP4.pth"
821
+ },
822
+ "model.layers.22.mlp.up_proj": {
823
+ "FP8_Dynamic": "quantized_layers/model.layers.22.mlp.up_proj/FP8_Dynamic.pth",
824
+ "FP8_Block": "quantized_layers/model.layers.22.mlp.up_proj/FP8_Block.pth",
825
+ "NVFP4": "quantized_layers/model.layers.22.mlp.up_proj/NVFP4.pth"
826
+ },
827
+ "model.layers.22.mlp.down_proj": {
828
+ "FP8_Dynamic": "quantized_layers/model.layers.22.mlp.down_proj/FP8_Dynamic.pth",
829
+ "FP8_Block": "quantized_layers/model.layers.22.mlp.down_proj/FP8_Block.pth",
830
+ "NVFP4": "quantized_layers/model.layers.22.mlp.down_proj/NVFP4.pth"
831
+ },
832
+ "model.layers.23.self_attn.q_proj": {
833
+ "FP8_Dynamic": "quantized_layers/model.layers.23.self_attn.q_proj/FP8_Dynamic.pth",
834
+ "FP8_Block": "quantized_layers/model.layers.23.self_attn.q_proj/FP8_Block.pth",
835
+ "NVFP4": "quantized_layers/model.layers.23.self_attn.q_proj/NVFP4.pth"
836
+ },
837
+ "model.layers.23.self_attn.k_proj": {
838
+ "FP8_Dynamic": "quantized_layers/model.layers.23.self_attn.k_proj/FP8_Dynamic.pth",
839
+ "FP8_Block": "quantized_layers/model.layers.23.self_attn.k_proj/FP8_Block.pth",
840
+ "NVFP4": "quantized_layers/model.layers.23.self_attn.k_proj/NVFP4.pth"
841
+ },
842
+ "model.layers.23.self_attn.v_proj": {
843
+ "FP8_Dynamic": "quantized_layers/model.layers.23.self_attn.v_proj/FP8_Dynamic.pth",
844
+ "FP8_Block": "quantized_layers/model.layers.23.self_attn.v_proj/FP8_Block.pth",
845
+ "NVFP4": "quantized_layers/model.layers.23.self_attn.v_proj/NVFP4.pth"
846
+ },
847
+ "model.layers.23.self_attn.o_proj": {
848
+ "FP8_Dynamic": "quantized_layers/model.layers.23.self_attn.o_proj/FP8_Dynamic.pth",
849
+ "FP8_Block": "quantized_layers/model.layers.23.self_attn.o_proj/FP8_Block.pth",
850
+ "NVFP4": "quantized_layers/model.layers.23.self_attn.o_proj/NVFP4.pth"
851
+ },
852
+ "model.layers.23.mlp.gate_proj": {
853
+ "FP8_Dynamic": "quantized_layers/model.layers.23.mlp.gate_proj/FP8_Dynamic.pth",
854
+ "FP8_Block": "quantized_layers/model.layers.23.mlp.gate_proj/FP8_Block.pth",
855
+ "NVFP4": "quantized_layers/model.layers.23.mlp.gate_proj/NVFP4.pth"
856
+ },
857
+ "model.layers.23.mlp.up_proj": {
858
+ "FP8_Dynamic": "quantized_layers/model.layers.23.mlp.up_proj/FP8_Dynamic.pth",
859
+ "FP8_Block": "quantized_layers/model.layers.23.mlp.up_proj/FP8_Block.pth",
860
+ "NVFP4": "quantized_layers/model.layers.23.mlp.up_proj/NVFP4.pth"
861
+ },
862
+ "model.layers.23.mlp.down_proj": {
863
+ "FP8_Dynamic": "quantized_layers/model.layers.23.mlp.down_proj/FP8_Dynamic.pth",
864
+ "FP8_Block": "quantized_layers/model.layers.23.mlp.down_proj/FP8_Block.pth",
865
+ "NVFP4": "quantized_layers/model.layers.23.mlp.down_proj/NVFP4.pth"
866
+ },
867
+ "model.layers.24.self_attn.q_proj": {
868
+ "FP8_Dynamic": "quantized_layers/model.layers.24.self_attn.q_proj/FP8_Dynamic.pth",
869
+ "FP8_Block": "quantized_layers/model.layers.24.self_attn.q_proj/FP8_Block.pth",
870
+ "NVFP4": "quantized_layers/model.layers.24.self_attn.q_proj/NVFP4.pth"
871
+ },
872
+ "model.layers.24.self_attn.k_proj": {
873
+ "FP8_Dynamic": "quantized_layers/model.layers.24.self_attn.k_proj/FP8_Dynamic.pth",
874
+ "FP8_Block": "quantized_layers/model.layers.24.self_attn.k_proj/FP8_Block.pth",
875
+ "NVFP4": "quantized_layers/model.layers.24.self_attn.k_proj/NVFP4.pth"
876
+ },
877
+ "model.layers.24.self_attn.v_proj": {
878
+ "FP8_Dynamic": "quantized_layers/model.layers.24.self_attn.v_proj/FP8_Dynamic.pth",
879
+ "FP8_Block": "quantized_layers/model.layers.24.self_attn.v_proj/FP8_Block.pth",
880
+ "NVFP4": "quantized_layers/model.layers.24.self_attn.v_proj/NVFP4.pth"
881
+ },
882
+ "model.layers.24.self_attn.o_proj": {
883
+ "FP8_Dynamic": "quantized_layers/model.layers.24.self_attn.o_proj/FP8_Dynamic.pth",
884
+ "FP8_Block": "quantized_layers/model.layers.24.self_attn.o_proj/FP8_Block.pth",
885
+ "NVFP4": "quantized_layers/model.layers.24.self_attn.o_proj/NVFP4.pth"
886
+ },
887
+ "model.layers.24.mlp.gate_proj": {
888
+ "FP8_Dynamic": "quantized_layers/model.layers.24.mlp.gate_proj/FP8_Dynamic.pth",
889
+ "FP8_Block": "quantized_layers/model.layers.24.mlp.gate_proj/FP8_Block.pth",
890
+ "NVFP4": "quantized_layers/model.layers.24.mlp.gate_proj/NVFP4.pth"
891
+ },
892
+ "model.layers.24.mlp.up_proj": {
893
+ "FP8_Dynamic": "quantized_layers/model.layers.24.mlp.up_proj/FP8_Dynamic.pth",
894
+ "FP8_Block": "quantized_layers/model.layers.24.mlp.up_proj/FP8_Block.pth",
895
+ "NVFP4": "quantized_layers/model.layers.24.mlp.up_proj/NVFP4.pth"
896
+ },
897
+ "model.layers.24.mlp.down_proj": {
898
+ "FP8_Dynamic": "quantized_layers/model.layers.24.mlp.down_proj/FP8_Dynamic.pth",
899
+ "FP8_Block": "quantized_layers/model.layers.24.mlp.down_proj/FP8_Block.pth",
900
+ "NVFP4": "quantized_layers/model.layers.24.mlp.down_proj/NVFP4.pth"
901
+ },
902
+ "model.layers.25.self_attn.q_proj": {
903
+ "FP8_Dynamic": "quantized_layers/model.layers.25.self_attn.q_proj/FP8_Dynamic.pth",
904
+ "FP8_Block": "quantized_layers/model.layers.25.self_attn.q_proj/FP8_Block.pth",
905
+ "NVFP4": "quantized_layers/model.layers.25.self_attn.q_proj/NVFP4.pth"
906
+ },
907
+ "model.layers.25.self_attn.k_proj": {
908
+ "FP8_Dynamic": "quantized_layers/model.layers.25.self_attn.k_proj/FP8_Dynamic.pth",
909
+ "FP8_Block": "quantized_layers/model.layers.25.self_attn.k_proj/FP8_Block.pth",
910
+ "NVFP4": "quantized_layers/model.layers.25.self_attn.k_proj/NVFP4.pth"
911
+ },
912
+ "model.layers.25.self_attn.v_proj": {
913
+ "FP8_Dynamic": "quantized_layers/model.layers.25.self_attn.v_proj/FP8_Dynamic.pth",
914
+ "FP8_Block": "quantized_layers/model.layers.25.self_attn.v_proj/FP8_Block.pth",
915
+ "NVFP4": "quantized_layers/model.layers.25.self_attn.v_proj/NVFP4.pth"
916
+ },
917
+ "model.layers.25.self_attn.o_proj": {
918
+ "FP8_Dynamic": "quantized_layers/model.layers.25.self_attn.o_proj/FP8_Dynamic.pth",
919
+ "FP8_Block": "quantized_layers/model.layers.25.self_attn.o_proj/FP8_Block.pth",
920
+ "NVFP4": "quantized_layers/model.layers.25.self_attn.o_proj/NVFP4.pth"
921
+ },
922
+ "model.layers.25.mlp.gate_proj": {
923
+ "FP8_Dynamic": "quantized_layers/model.layers.25.mlp.gate_proj/FP8_Dynamic.pth",
924
+ "FP8_Block": "quantized_layers/model.layers.25.mlp.gate_proj/FP8_Block.pth",
925
+ "NVFP4": "quantized_layers/model.layers.25.mlp.gate_proj/NVFP4.pth"
926
+ },
927
+ "model.layers.25.mlp.up_proj": {
928
+ "FP8_Dynamic": "quantized_layers/model.layers.25.mlp.up_proj/FP8_Dynamic.pth",
929
+ "FP8_Block": "quantized_layers/model.layers.25.mlp.up_proj/FP8_Block.pth",
930
+ "NVFP4": "quantized_layers/model.layers.25.mlp.up_proj/NVFP4.pth"
931
+ },
932
+ "model.layers.25.mlp.down_proj": {
933
+ "FP8_Dynamic": "quantized_layers/model.layers.25.mlp.down_proj/FP8_Dynamic.pth",
934
+ "FP8_Block": "quantized_layers/model.layers.25.mlp.down_proj/FP8_Block.pth",
935
+ "NVFP4": "quantized_layers/model.layers.25.mlp.down_proj/NVFP4.pth"
936
+ },
937
+ "model.layers.26.self_attn.q_proj": {
938
+ "FP8_Dynamic": "quantized_layers/model.layers.26.self_attn.q_proj/FP8_Dynamic.pth",
939
+ "FP8_Block": "quantized_layers/model.layers.26.self_attn.q_proj/FP8_Block.pth",
940
+ "NVFP4": "quantized_layers/model.layers.26.self_attn.q_proj/NVFP4.pth"
941
+ },
942
+ "model.layers.26.self_attn.k_proj": {
943
+ "FP8_Dynamic": "quantized_layers/model.layers.26.self_attn.k_proj/FP8_Dynamic.pth",
944
+ "FP8_Block": "quantized_layers/model.layers.26.self_attn.k_proj/FP8_Block.pth",
945
+ "NVFP4": "quantized_layers/model.layers.26.self_attn.k_proj/NVFP4.pth"
946
+ },
947
+ "model.layers.26.self_attn.v_proj": {
948
+ "FP8_Dynamic": "quantized_layers/model.layers.26.self_attn.v_proj/FP8_Dynamic.pth",
949
+ "FP8_Block": "quantized_layers/model.layers.26.self_attn.v_proj/FP8_Block.pth",
950
+ "NVFP4": "quantized_layers/model.layers.26.self_attn.v_proj/NVFP4.pth"
951
+ },
952
+ "model.layers.26.self_attn.o_proj": {
953
+ "FP8_Dynamic": "quantized_layers/model.layers.26.self_attn.o_proj/FP8_Dynamic.pth",
954
+ "FP8_Block": "quantized_layers/model.layers.26.self_attn.o_proj/FP8_Block.pth",
955
+ "NVFP4": "quantized_layers/model.layers.26.self_attn.o_proj/NVFP4.pth"
956
+ },
957
+ "model.layers.26.mlp.gate_proj": {
958
+ "FP8_Dynamic": "quantized_layers/model.layers.26.mlp.gate_proj/FP8_Dynamic.pth",
959
+ "FP8_Block": "quantized_layers/model.layers.26.mlp.gate_proj/FP8_Block.pth",
960
+ "NVFP4": "quantized_layers/model.layers.26.mlp.gate_proj/NVFP4.pth"
961
+ },
962
+ "model.layers.26.mlp.up_proj": {
963
+ "FP8_Dynamic": "quantized_layers/model.layers.26.mlp.up_proj/FP8_Dynamic.pth",
964
+ "FP8_Block": "quantized_layers/model.layers.26.mlp.up_proj/FP8_Block.pth",
965
+ "NVFP4": "quantized_layers/model.layers.26.mlp.up_proj/NVFP4.pth"
966
+ },
967
+ "model.layers.26.mlp.down_proj": {
968
+ "FP8_Dynamic": "quantized_layers/model.layers.26.mlp.down_proj/FP8_Dynamic.pth",
969
+ "FP8_Block": "quantized_layers/model.layers.26.mlp.down_proj/FP8_Block.pth",
970
+ "NVFP4": "quantized_layers/model.layers.26.mlp.down_proj/NVFP4.pth"
971
+ },
972
+ "model.layers.27.self_attn.q_proj": {
973
+ "FP8_Dynamic": "quantized_layers/model.layers.27.self_attn.q_proj/FP8_Dynamic.pth",
974
+ "FP8_Block": "quantized_layers/model.layers.27.self_attn.q_proj/FP8_Block.pth",
975
+ "NVFP4": "quantized_layers/model.layers.27.self_attn.q_proj/NVFP4.pth"
976
+ },
977
+ "model.layers.27.self_attn.k_proj": {
978
+ "FP8_Dynamic": "quantized_layers/model.layers.27.self_attn.k_proj/FP8_Dynamic.pth",
979
+ "FP8_Block": "quantized_layers/model.layers.27.self_attn.k_proj/FP8_Block.pth",
980
+ "NVFP4": "quantized_layers/model.layers.27.self_attn.k_proj/NVFP4.pth"
981
+ },
982
+ "model.layers.27.self_attn.v_proj": {
983
+ "FP8_Dynamic": "quantized_layers/model.layers.27.self_attn.v_proj/FP8_Dynamic.pth",
984
+ "FP8_Block": "quantized_layers/model.layers.27.self_attn.v_proj/FP8_Block.pth",
985
+ "NVFP4": "quantized_layers/model.layers.27.self_attn.v_proj/NVFP4.pth"
986
+ },
987
+ "model.layers.27.self_attn.o_proj": {
988
+ "FP8_Dynamic": "quantized_layers/model.layers.27.self_attn.o_proj/FP8_Dynamic.pth",
989
+ "FP8_Block": "quantized_layers/model.layers.27.self_attn.o_proj/FP8_Block.pth",
990
+ "NVFP4": "quantized_layers/model.layers.27.self_attn.o_proj/NVFP4.pth"
991
+ },
992
+ "model.layers.27.mlp.gate_proj": {
993
+ "FP8_Dynamic": "quantized_layers/model.layers.27.mlp.gate_proj/FP8_Dynamic.pth",
994
+ "FP8_Block": "quantized_layers/model.layers.27.mlp.gate_proj/FP8_Block.pth",
995
+ "NVFP4": "quantized_layers/model.layers.27.mlp.gate_proj/NVFP4.pth"
996
+ },
997
+ "model.layers.27.mlp.up_proj": {
998
+ "FP8_Dynamic": "quantized_layers/model.layers.27.mlp.up_proj/FP8_Dynamic.pth",
999
+ "FP8_Block": "quantized_layers/model.layers.27.mlp.up_proj/FP8_Block.pth",
1000
+ "NVFP4": "quantized_layers/model.layers.27.mlp.up_proj/NVFP4.pth"
1001
+ },
1002
+ "model.layers.27.mlp.down_proj": {
1003
+ "FP8_Dynamic": "quantized_layers/model.layers.27.mlp.down_proj/FP8_Dynamic.pth",
1004
+ "FP8_Block": "quantized_layers/model.layers.27.mlp.down_proj/FP8_Block.pth",
1005
+ "NVFP4": "quantized_layers/model.layers.27.mlp.down_proj/NVFP4.pth"
1006
+ },
1007
+ "model.layers.28.self_attn.q_proj": {
1008
+ "FP8_Dynamic": "quantized_layers/model.layers.28.self_attn.q_proj/FP8_Dynamic.pth",
1009
+ "FP8_Block": "quantized_layers/model.layers.28.self_attn.q_proj/FP8_Block.pth",
1010
+ "NVFP4": "quantized_layers/model.layers.28.self_attn.q_proj/NVFP4.pth"
1011
+ },
1012
+ "model.layers.28.self_attn.k_proj": {
1013
+ "FP8_Dynamic": "quantized_layers/model.layers.28.self_attn.k_proj/FP8_Dynamic.pth",
1014
+ "FP8_Block": "quantized_layers/model.layers.28.self_attn.k_proj/FP8_Block.pth",
1015
+ "NVFP4": "quantized_layers/model.layers.28.self_attn.k_proj/NVFP4.pth"
1016
+ },
1017
+ "model.layers.28.self_attn.v_proj": {
1018
+ "FP8_Dynamic": "quantized_layers/model.layers.28.self_attn.v_proj/FP8_Dynamic.pth",
1019
+ "FP8_Block": "quantized_layers/model.layers.28.self_attn.v_proj/FP8_Block.pth",
1020
+ "NVFP4": "quantized_layers/model.layers.28.self_attn.v_proj/NVFP4.pth"
1021
+ },
1022
+ "model.layers.28.self_attn.o_proj": {
1023
+ "FP8_Dynamic": "quantized_layers/model.layers.28.self_attn.o_proj/FP8_Dynamic.pth",
1024
+ "FP8_Block": "quantized_layers/model.layers.28.self_attn.o_proj/FP8_Block.pth",
1025
+ "NVFP4": "quantized_layers/model.layers.28.self_attn.o_proj/NVFP4.pth"
1026
+ },
1027
+ "model.layers.28.mlp.gate_proj": {
1028
+ "FP8_Dynamic": "quantized_layers/model.layers.28.mlp.gate_proj/FP8_Dynamic.pth",
1029
+ "FP8_Block": "quantized_layers/model.layers.28.mlp.gate_proj/FP8_Block.pth",
1030
+ "NVFP4": "quantized_layers/model.layers.28.mlp.gate_proj/NVFP4.pth"
1031
+ },
1032
+ "model.layers.28.mlp.up_proj": {
1033
+ "FP8_Dynamic": "quantized_layers/model.layers.28.mlp.up_proj/FP8_Dynamic.pth",
1034
+ "FP8_Block": "quantized_layers/model.layers.28.mlp.up_proj/FP8_Block.pth",
1035
+ "NVFP4": "quantized_layers/model.layers.28.mlp.up_proj/NVFP4.pth"
1036
+ },
1037
+ "model.layers.28.mlp.down_proj": {
1038
+ "FP8_Dynamic": "quantized_layers/model.layers.28.mlp.down_proj/FP8_Dynamic.pth",
1039
+ "FP8_Block": "quantized_layers/model.layers.28.mlp.down_proj/FP8_Block.pth",
1040
+ "NVFP4": "quantized_layers/model.layers.28.mlp.down_proj/NVFP4.pth"
1041
+ },
1042
+ "model.layers.29.self_attn.q_proj": {
1043
+ "FP8_Dynamic": "quantized_layers/model.layers.29.self_attn.q_proj/FP8_Dynamic.pth",
1044
+ "FP8_Block": "quantized_layers/model.layers.29.self_attn.q_proj/FP8_Block.pth",
1045
+ "NVFP4": "quantized_layers/model.layers.29.self_attn.q_proj/NVFP4.pth"
1046
+ },
1047
+ "model.layers.29.self_attn.k_proj": {
1048
+ "FP8_Dynamic": "quantized_layers/model.layers.29.self_attn.k_proj/FP8_Dynamic.pth",
1049
+ "FP8_Block": "quantized_layers/model.layers.29.self_attn.k_proj/FP8_Block.pth",
1050
+ "NVFP4": "quantized_layers/model.layers.29.self_attn.k_proj/NVFP4.pth"
1051
+ },
1052
+ "model.layers.29.self_attn.v_proj": {
1053
+ "FP8_Dynamic": "quantized_layers/model.layers.29.self_attn.v_proj/FP8_Dynamic.pth",
1054
+ "FP8_Block": "quantized_layers/model.layers.29.self_attn.v_proj/FP8_Block.pth",
1055
+ "NVFP4": "quantized_layers/model.layers.29.self_attn.v_proj/NVFP4.pth"
1056
+ },
1057
+ "model.layers.29.self_attn.o_proj": {
1058
+ "FP8_Dynamic": "quantized_layers/model.layers.29.self_attn.o_proj/FP8_Dynamic.pth",
1059
+ "FP8_Block": "quantized_layers/model.layers.29.self_attn.o_proj/FP8_Block.pth",
1060
+ "NVFP4": "quantized_layers/model.layers.29.self_attn.o_proj/NVFP4.pth"
1061
+ },
1062
+ "model.layers.29.mlp.gate_proj": {
1063
+ "FP8_Dynamic": "quantized_layers/model.layers.29.mlp.gate_proj/FP8_Dynamic.pth",
1064
+ "FP8_Block": "quantized_layers/model.layers.29.mlp.gate_proj/FP8_Block.pth",
1065
+ "NVFP4": "quantized_layers/model.layers.29.mlp.gate_proj/NVFP4.pth"
1066
+ },
1067
+ "model.layers.29.mlp.up_proj": {
1068
+ "FP8_Dynamic": "quantized_layers/model.layers.29.mlp.up_proj/FP8_Dynamic.pth",
1069
+ "FP8_Block": "quantized_layers/model.layers.29.mlp.up_proj/FP8_Block.pth",
1070
+ "NVFP4": "quantized_layers/model.layers.29.mlp.up_proj/NVFP4.pth"
1071
+ },
1072
+ "model.layers.29.mlp.down_proj": {
1073
+ "FP8_Dynamic": "quantized_layers/model.layers.29.mlp.down_proj/FP8_Dynamic.pth",
1074
+ "FP8_Block": "quantized_layers/model.layers.29.mlp.down_proj/FP8_Block.pth",
1075
+ "NVFP4": "quantized_layers/model.layers.29.mlp.down_proj/NVFP4.pth"
1076
+ },
1077
+ "model.layers.30.self_attn.q_proj": {
1078
+ "FP8_Dynamic": "quantized_layers/model.layers.30.self_attn.q_proj/FP8_Dynamic.pth",
1079
+ "FP8_Block": "quantized_layers/model.layers.30.self_attn.q_proj/FP8_Block.pth",
1080
+ "NVFP4": "quantized_layers/model.layers.30.self_attn.q_proj/NVFP4.pth"
1081
+ },
1082
+ "model.layers.30.self_attn.k_proj": {
1083
+ "FP8_Dynamic": "quantized_layers/model.layers.30.self_attn.k_proj/FP8_Dynamic.pth",
1084
+ "FP8_Block": "quantized_layers/model.layers.30.self_attn.k_proj/FP8_Block.pth",
1085
+ "NVFP4": "quantized_layers/model.layers.30.self_attn.k_proj/NVFP4.pth"
1086
+ },
1087
+ "model.layers.30.self_attn.v_proj": {
1088
+ "FP8_Dynamic": "quantized_layers/model.layers.30.self_attn.v_proj/FP8_Dynamic.pth",
1089
+ "FP8_Block": "quantized_layers/model.layers.30.self_attn.v_proj/FP8_Block.pth",
1090
+ "NVFP4": "quantized_layers/model.layers.30.self_attn.v_proj/NVFP4.pth"
1091
+ },
1092
+ "model.layers.30.self_attn.o_proj": {
1093
+ "FP8_Dynamic": "quantized_layers/model.layers.30.self_attn.o_proj/FP8_Dynamic.pth",
1094
+ "FP8_Block": "quantized_layers/model.layers.30.self_attn.o_proj/FP8_Block.pth",
1095
+ "NVFP4": "quantized_layers/model.layers.30.self_attn.o_proj/NVFP4.pth"
1096
+ },
1097
+ "model.layers.30.mlp.gate_proj": {
1098
+ "FP8_Dynamic": "quantized_layers/model.layers.30.mlp.gate_proj/FP8_Dynamic.pth",
1099
+ "FP8_Block": "quantized_layers/model.layers.30.mlp.gate_proj/FP8_Block.pth",
1100
+ "NVFP4": "quantized_layers/model.layers.30.mlp.gate_proj/NVFP4.pth"
1101
+ },
1102
+ "model.layers.30.mlp.up_proj": {
1103
+ "FP8_Dynamic": "quantized_layers/model.layers.30.mlp.up_proj/FP8_Dynamic.pth",
1104
+ "FP8_Block": "quantized_layers/model.layers.30.mlp.up_proj/FP8_Block.pth",
1105
+ "NVFP4": "quantized_layers/model.layers.30.mlp.up_proj/NVFP4.pth"
1106
+ },
1107
+ "model.layers.30.mlp.down_proj": {
1108
+ "FP8_Dynamic": "quantized_layers/model.layers.30.mlp.down_proj/FP8_Dynamic.pth",
1109
+ "FP8_Block": "quantized_layers/model.layers.30.mlp.down_proj/FP8_Block.pth",
1110
+ "NVFP4": "quantized_layers/model.layers.30.mlp.down_proj/NVFP4.pth"
1111
+ },
1112
+ "model.layers.31.self_attn.q_proj": {
1113
+ "FP8_Dynamic": "quantized_layers/model.layers.31.self_attn.q_proj/FP8_Dynamic.pth",
1114
+ "FP8_Block": "quantized_layers/model.layers.31.self_attn.q_proj/FP8_Block.pth",
1115
+ "NVFP4": "quantized_layers/model.layers.31.self_attn.q_proj/NVFP4.pth"
1116
+ },
1117
+ "model.layers.31.self_attn.k_proj": {
1118
+ "FP8_Dynamic": "quantized_layers/model.layers.31.self_attn.k_proj/FP8_Dynamic.pth",
1119
+ "FP8_Block": "quantized_layers/model.layers.31.self_attn.k_proj/FP8_Block.pth",
1120
+ "NVFP4": "quantized_layers/model.layers.31.self_attn.k_proj/NVFP4.pth"
1121
+ },
1122
+ "model.layers.31.self_attn.v_proj": {
1123
+ "FP8_Dynamic": "quantized_layers/model.layers.31.self_attn.v_proj/FP8_Dynamic.pth",
1124
+ "FP8_Block": "quantized_layers/model.layers.31.self_attn.v_proj/FP8_Block.pth",
1125
+ "NVFP4": "quantized_layers/model.layers.31.self_attn.v_proj/NVFP4.pth"
1126
+ },
1127
+ "model.layers.31.self_attn.o_proj": {
1128
+ "FP8_Dynamic": "quantized_layers/model.layers.31.self_attn.o_proj/FP8_Dynamic.pth",
1129
+ "FP8_Block": "quantized_layers/model.layers.31.self_attn.o_proj/FP8_Block.pth",
1130
+ "NVFP4": "quantized_layers/model.layers.31.self_attn.o_proj/NVFP4.pth"
1131
+ },
1132
+ "model.layers.31.mlp.gate_proj": {
1133
+ "FP8_Dynamic": "quantized_layers/model.layers.31.mlp.gate_proj/FP8_Dynamic.pth",
1134
+ "FP8_Block": "quantized_layers/model.layers.31.mlp.gate_proj/FP8_Block.pth",
1135
+ "NVFP4": "quantized_layers/model.layers.31.mlp.gate_proj/NVFP4.pth"
1136
+ },
1137
+ "model.layers.31.mlp.up_proj": {
1138
+ "FP8_Dynamic": "quantized_layers/model.layers.31.mlp.up_proj/FP8_Dynamic.pth",
1139
+ "FP8_Block": "quantized_layers/model.layers.31.mlp.up_proj/FP8_Block.pth",
1140
+ "NVFP4": "quantized_layers/model.layers.31.mlp.up_proj/NVFP4.pth"
1141
+ },
1142
+ "model.layers.31.mlp.down_proj": {
1143
+ "FP8_Dynamic": "quantized_layers/model.layers.31.mlp.down_proj/FP8_Dynamic.pth",
1144
+ "FP8_Block": "quantized_layers/model.layers.31.mlp.down_proj/FP8_Block.pth",
1145
+ "NVFP4": "quantized_layers/model.layers.31.mlp.down_proj/NVFP4.pth"
1146
+ }
1147
+ }
1148
+ }
model.layers.0.mlp.down_proj/FP8_Block.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39a5b2065a072a24494f7230e390a4e775adb84cb6655cba6eddd1a5486d11b5
3
+ size 117442359
model.layers.0.mlp.down_proj/FP8_Dynamic.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58405ec430978bbe2082b7569c8377c06af37f9ebf4fba8800202271348a8613
3
+ size 117442373
model.layers.0.mlp.down_proj/NVFP4.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f60f0b99c635540e979975831899c5ea3119fbf3ba4f37c6a45664952ac85f58
3
+ size 117442267
model.layers.0.mlp.gate_proj/FP8_Block.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d95f99af91d3bc222e8c45cdcf037e5b65f5c023aeb2bdb20f2b1fddbb3c8ef
3
+ size 117442359
model.layers.0.mlp.gate_proj/FP8_Dynamic.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f94b79a766c65795496d41e927ec68c67baf4e1f4ece95faa464e26bbc65eae
3
+ size 117442373
model.layers.0.mlp.gate_proj/NVFP4.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a429c738073d528249ca9eab09e1fb41841e2b61f0cc4de39e75e5a9e9f40de
3
+ size 117442267
model.layers.0.mlp.up_proj/FP8_Block.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:974c4b0c5dd003fe125111164643d5a1fdd254a19d915157dea2b1cecb2d6edf
3
+ size 117442359
model.layers.0.mlp.up_proj/FP8_Dynamic.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2085117fc6b0db4d3f29d49cbc56139e315d4aa0a4f662094cf32a67046eefb4
3
+ size 117442373
model.layers.0.mlp.up_proj/NVFP4.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e057693b6bfce3c6726e98a686932b13737118696dc56174c14c7e2246aaad7b
3
+ size 117442267
model.layers.0.self_attn.k_proj/FP8_Block.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2b5faf6367c3703c1593c8785ad143270d80914768436fe1424318ff55687ac
3
+ size 8390455
model.layers.0.self_attn.k_proj/FP8_Dynamic.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97bcdf10f857baf6ff5529cf2d1e5a27bbc678d97997b2900320c5de74742774
3
+ size 8390469
model.layers.0.self_attn.k_proj/NVFP4.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f65698d124b7c1d181dca6162711c843ba4f5d490585c20d62c83b516b66794
3
+ size 8390363
model.layers.0.self_attn.o_proj/FP8_Block.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:123f6080593d2441f5c0910bb97ce24cfa01404d46041fe6c3aeee75c5a8dbc4
3
+ size 33556279
model.layers.0.self_attn.o_proj/FP8_Dynamic.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f04130a870297081a00731315c3d64410edfed3c8fb797ae48ed8a07fe53e5c9
3
+ size 33556293
model.layers.0.self_attn.o_proj/NVFP4.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:060385094a6f336853a0aa25e26bed26bd1005b3690c02d13677719bcfcd3bae
3
+ size 33556187
model.layers.0.self_attn.q_proj/FP8_Block.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c4d2e42ef16c881782ac43d174d99416d6b20265ecb3101cbfa12ccacf23b40
3
+ size 33556279
model.layers.0.self_attn.q_proj/FP8_Dynamic.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d2b6b7c0cb0d3c16a86287bc68154fc27e0e318d60173223bef9c4ef03ead93
3
+ size 33556293
model.layers.0.self_attn.q_proj/NVFP4.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74ac207a12365392d5b70ae762738c96cca92fe4c766975986d11115d0054914
3
+ size 33556187
model.layers.0.self_attn.v_proj/FP8_Block.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa98789413d14912736ab2c1cc8d0400bc7d2d7f2341bb2fd2221a5500a9bd35
3
+ size 8390455
model.layers.0.self_attn.v_proj/FP8_Dynamic.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d24a54c4e7d657f679d85fc4cbacf5fd0b47ccb316d1cc52b0787efba278b6e0
3
+ size 8390469
model.layers.0.self_attn.v_proj/NVFP4.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d04a06a5605a533a12ee5f2096efe79b258af286e3da28bf6326a6ada797f5bb
3
+ size 8390363
model.layers.1.mlp.down_proj/FP8_Block.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97f4c3356c72d615bf69ae80261279c2d844a38c357fb42fd91ecfe2f0efc076
3
+ size 117442359
model.layers.1.mlp.down_proj/FP8_Dynamic.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d1dfbb1d48975bb896b031956c3862ac6dc00006bd91e1793f5c4aa1464d250
3
+ size 117442373
model.layers.1.mlp.down_proj/NVFP4.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76269fe3798f0aded6158eafbc67deff240ae696519e0de3292ca9916048523a
3
+ size 117442267
model.layers.1.mlp.gate_proj/FP8_Block.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c124aa5b52a855ee5c76ec910df3124dfccbccb14a7ec39a11cc2d07a36bfc5
3
+ size 117442359
model.layers.1.mlp.gate_proj/FP8_Dynamic.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5ac3b7ca16de58b3ac5e8ef9712fd52687ab7d426a18266019f353a497ac1d3
3
+ size 117442373
model.layers.1.mlp.gate_proj/NVFP4.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:391f675bf9012bcf4af252901e44afb426e599bb9793285312fff518b9409322
3
+ size 117442267
model.layers.1.mlp.up_proj/FP8_Block.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b6e1ac61264e534ed42dfeaba6ecc9c0a7479b5a12a14cf203cd6ebeaff7edb
3
+ size 117442359
model.layers.1.mlp.up_proj/FP8_Dynamic.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2be56c2a9c86205c1decb940b79c025e355875327df3ca5dbf43ec94ee441a7
3
+ size 117442373
model.layers.1.mlp.up_proj/NVFP4.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b07c43e6b9f63a9b047ecd107148952191bcf2191af7538c6cb4ee424fb6f49
3
+ size 117442267
model.layers.1.self_attn.k_proj/FP8_Block.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c204a8373646eeb212ed55867290449328a2e609f8298a7a9324290e09bab690
3
+ size 8390455
model.layers.1.self_attn.k_proj/FP8_Dynamic.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c512b0b0acca91bd0f10f5dfc28678d8e728aaa7d768a855917d17a1d0df84f
3
+ size 8390469
model.layers.1.self_attn.k_proj/NVFP4.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0abbfc750d622b61806ed375c1da3138c09f86ef1e38af6b8ccf3c2e352ce903
3
+ size 8390363
model.layers.1.self_attn.o_proj/FP8_Block.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a22bcaed6353dbc68bb1d860de5215833f5f6fb4e58561c4b4dc91981afc046
3
+ size 33556279
model.layers.1.self_attn.o_proj/FP8_Dynamic.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f78a75238732ec84ed7cc6d35f3712b86a24559c85cf4d5dc539d2f5cb005be3
3
+ size 33556293
model.layers.1.self_attn.o_proj/NVFP4.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3ca3261ec1d6b02dde86ee497f7dfeea3f0bc63934f400eb674391b0f5c9da6
3
+ size 33556187
model.layers.1.self_attn.q_proj/FP8_Block.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5da85ebd784f19349018bd4bb883702d50637e056fd2b7d3f1f311a37c44b0e4
3
+ size 33556279
model.layers.1.self_attn.q_proj/FP8_Dynamic.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69d6c090098660296c7e48577d44889f1232764bc035662e7322f526bdc7ed60
3
+ size 33556293
model.layers.1.self_attn.q_proj/NVFP4.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92bc1756877bef1ccc76df6306ad016d963678fbfc752d35a8b986ef49d31e77
3
+ size 33556187
model.layers.1.self_attn.v_proj/FP8_Block.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:073ede965bda0ca3bc10b1201b8c721d275cbeed6ec54b6754feb815a2171b47
3
+ size 8390455
model.layers.1.self_attn.v_proj/FP8_Dynamic.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d2d7b6efd362f1fb64c0d95d3a76ae61231d56bd0b65cc4523861781bd9357a
3
+ size 8390469
model.layers.1.self_attn.v_proj/NVFP4.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c36dd7cf7552779e6a5ea331d9fb76ec342e0661e91b2b3a724fbb4483dbdfe
3
+ size 8390363
model.layers.10.mlp.down_proj/FP8_Block.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ea3b2d9dfd3c89690ede2337ea42509821a8903056fe5bac9014cb3e41bf5f5
3
+ size 117442359
model.layers.10.mlp.down_proj/FP8_Dynamic.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:416821f070b257ddf155a1ed6ec9fda0a98a14d21051c8c3178d49f48b176cc6
3
+ size 117442373
model.layers.10.mlp.down_proj/NVFP4.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a820fc0fa4abe50f3334ccc2b5c204fa149ef119013769896b9016e4e43da27e
3
+ size 117442267
model.layers.10.mlp.gate_proj/FP8_Block.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9b94673fef98d0b627ea4aa1e720f7cd6d5431f3e22a522c2494f120cf1e08e
3
+ size 117442359
model.layers.10.mlp.gate_proj/FP8_Dynamic.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56d7b4ded6d9efd100d9c265b65bcfa2dc2ef135a21bd706105e598f17131d71
3
+ size 117442373
model.layers.10.mlp.gate_proj/NVFP4.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:039514d63db0496aa1a8067fc5c66199cb28a0ba071be6fe06607bd4c9571a3a
3
+ size 117442267
model.layers.10.mlp.up_proj/FP8_Block.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68ec769ec3e170d376cfffdc718475908477aa8115d0e559d3654e52aa1060fd
3
+ size 117442359