Koushik Dutta commited on
Commit ·
286d8f1
1
Parent(s): 45d0854
working quantized model
Browse files- export.py +9 -4
- openvino/text_int8.bin +3 -0
- openvino/text_int8.xml +0 -0
- openvino/vision_int8.bin +3 -0
- openvino/vision_int8.xml +0 -0
export.py
CHANGED
|
@@ -180,13 +180,18 @@ def export_openvino_int8():
|
|
| 180 |
vision_dataset = nncf.Dataset(vision_calibration_dataset)
|
| 181 |
text_dataset = nncf.Dataset(text_calibration_dataset)
|
| 182 |
|
| 183 |
-
quantized_vision_model = nncf.quantize(ov_vision_model, vision_dataset, preset=nncf.QuantizationPreset.MIXED
|
| 184 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 185 |
|
| 186 |
ov.save_model(quantized_vision_model, "openvino/vision_int8.xml")
|
| 187 |
ov.save_model(quantized_text_model, "openvino/text_int8.xml")
|
| 188 |
|
| 189 |
-
|
| 190 |
|
| 191 |
def infer_openvino_int8():
|
| 192 |
import openvino as ov
|
|
@@ -246,7 +251,7 @@ def infer_ncnn():
|
|
| 246 |
|
| 247 |
print("similarity:", logits_per_text[0])
|
| 248 |
|
| 249 |
-
infer_ncnn()
|
| 250 |
|
| 251 |
def infer_torch():
|
| 252 |
outputs = ptmodel(**inputs)
|
|
|
|
| 180 |
vision_dataset = nncf.Dataset(vision_calibration_dataset)
|
| 181 |
text_dataset = nncf.Dataset(text_calibration_dataset)
|
| 182 |
|
| 183 |
+
quantized_vision_model = nncf.quantize(ov_vision_model, vision_dataset, preset=nncf.QuantizationPreset.MIXED, model_type=nncf.ModelType.TRANSFORMER,
|
| 184 |
+
# advanced_parameters=nncf.AdvancedQuantizationParameters(disable_bias_correction=True)
|
| 185 |
+
)
|
| 186 |
+
|
| 187 |
+
quantized_text_model = nncf.quantize(ov_text_model, text_dataset, preset=nncf.QuantizationPreset.MIXED, model_type=nncf.ModelType.TRANSFORMER,
|
| 188 |
+
# advanced_parameters=nncf.AdvancedQuantizationParameters(disable_bias_correction=True)
|
| 189 |
+
)
|
| 190 |
|
| 191 |
ov.save_model(quantized_vision_model, "openvino/vision_int8.xml")
|
| 192 |
ov.save_model(quantized_text_model, "openvino/text_int8.xml")
|
| 193 |
|
| 194 |
+
export_openvino_int8()
|
| 195 |
|
| 196 |
def infer_openvino_int8():
|
| 197 |
import openvino as ov
|
|
|
|
| 251 |
|
| 252 |
print("similarity:", logits_per_text[0])
|
| 253 |
|
| 254 |
+
# infer_ncnn()
|
| 255 |
|
| 256 |
def infer_torch():
|
| 257 |
outputs = ptmodel(**inputs)
|
openvino/text_int8.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ad0d394e83a24bc1d2d9415f43a5847ccb804cb44715bbbc45f21b4190673d94
|
| 3 |
+
size 64105628
|
openvino/text_int8.xml
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
openvino/vision_int8.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:798edc4ba50811ec147985d57d601cf2744e706a6f9dbac0d2d66dd93f159087
|
| 3 |
+
size 88610018
|
openvino/vision_int8.xml
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|