Koushik Dutta commited on
Commit ·
94a263a
1
Parent(s): 4629fa5
onnx
Browse files
export.py
CHANGED
|
@@ -5,6 +5,8 @@ from transformers.models.clip.modeling_clip import _get_vector_norm
|
|
| 5 |
import coremltools as ct
|
| 6 |
import torch
|
| 7 |
import numpy as np
|
|
|
|
|
|
|
| 8 |
|
| 9 |
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
|
| 10 |
|
|
@@ -94,6 +96,42 @@ def convert_onnx():
|
|
| 94 |
# convert_onnx()
|
| 95 |
|
| 96 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
def convert_openvino():
|
| 98 |
import openvino as ov
|
| 99 |
ov_vision_model = ov.convert_model(traced_vision_model, example_input=inputs.data['pixel_values'])
|
|
@@ -122,7 +160,7 @@ def infer_openvino():
|
|
| 122 |
|
| 123 |
print("similarity:", logits_per_text.item())
|
| 124 |
|
| 125 |
-
infer_openvino()
|
| 126 |
|
| 127 |
# convert_coreml()
|
| 128 |
|
|
|
|
| 5 |
import coremltools as ct
|
| 6 |
import torch
|
| 7 |
import numpy as np
|
| 8 |
+
import platform
|
| 9 |
+
import sys
|
| 10 |
|
| 11 |
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
|
| 12 |
|
|
|
|
| 96 |
# convert_onnx()
|
| 97 |
|
| 98 |
|
| 99 |
+
def infer_onnx():
|
| 100 |
+
import onnxruntime as ort
|
| 101 |
+
|
| 102 |
+
providers: list[str] = []
|
| 103 |
+
if sys.platform == "darwin":
|
| 104 |
+
providers.append("CoreMLExecutionProvider")
|
| 105 |
+
|
| 106 |
+
if ("linux" in sys.platform or "win" in sys.platform) and (
|
| 107 |
+
platform.machine() == "x86_64" or platform.machine() == "AMD64"
|
| 108 |
+
):
|
| 109 |
+
providers.append(("CUDAExecutionProvider", {"device_id": 0}))
|
| 110 |
+
|
| 111 |
+
providers.append("CPUExecutionProvider")
|
| 112 |
+
|
| 113 |
+
vision_session = ort.InferenceSession("vision.onnx", providers=providers)
|
| 114 |
+
text_session = ort.InferenceSession("text.onnx", providers=providers)
|
| 115 |
+
|
| 116 |
+
vision_inputs = {vision_session.get_inputs()[0].name: np_inputs['pixel_values']}
|
| 117 |
+
text_inputs = {
|
| 118 |
+
text_session.get_inputs()[0].name: np_inputs['input_ids'],
|
| 119 |
+
text_session.get_inputs()[1].name: np_inputs['attention_mask']
|
| 120 |
+
}
|
| 121 |
+
|
| 122 |
+
vision_predictions = vision_session.run(None, vision_inputs)
|
| 123 |
+
text_predictions = text_session.run(None, text_inputs)
|
| 124 |
+
|
| 125 |
+
image_embeds = vision_predictions[0]
|
| 126 |
+
text_embeds = text_predictions[0]
|
| 127 |
+
|
| 128 |
+
logits_per_text = text_embeds @ image_embeds.T
|
| 129 |
+
|
| 130 |
+
print("similarity:", logits_per_text.item())
|
| 131 |
+
|
| 132 |
+
infer_onnx()
|
| 133 |
+
|
| 134 |
+
|
| 135 |
def convert_openvino():
|
| 136 |
import openvino as ov
|
| 137 |
ov_vision_model = ov.convert_model(traced_vision_model, example_input=inputs.data['pixel_values'])
|
|
|
|
| 160 |
|
| 161 |
print("similarity:", logits_per_text.item())
|
| 162 |
|
| 163 |
+
# infer_openvino()
|
| 164 |
|
| 165 |
# convert_coreml()
|
| 166 |
|