File size: 1,488 Bytes
537d300 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 | import onnxruntime as ort
import numpy as np
from PIL import Image
import io
import time
from transformers import AutoImageProcessor, ResNetForImageClassification
# Load feature extractor
processor = AutoImageProcessor.from_pretrained("microsoft/resnet-18")
# Optimize session for multi-process environment
sess_options = ort.SessionOptions()
sess_options.intra_op_num_threads = 1 # One thread per process worker
sess_options.inter_op_num_threads = 1
sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
# Load ONNX session
session = ort.InferenceSession(
"models/resnet18_quantized.onnx",
sess_options=sess_options,
providers=["CPUExecutionProvider"]
)
# Load label mapping
cfg = ResNetForImageClassification.from_pretrained("microsoft/resnet-18").config
def run_inference(image_bytes: bytes) -> dict:
img = Image.open(io.BytesIO(image_bytes)).convert("RGB")
inputs = processor(images=img, return_tensors="np")
pixel_values = inputs["pixel_values"].astype(np.float32)
t0 = time.perf_counter()
outputs = session.run(["logits"], {"pixel_values": pixel_values})
elapsed = (time.perf_counter() - t0) * 1000
logits = outputs[0][0]
predicted_class_id = int(np.argmax(logits))
return {
"label": cfg.id2label[predicted_class_id],
"score": float(np.exp(logits[predicted_class_id]) / np.sum(np.exp(logits))),
"label_id": predicted_class_id,
"inference_time_ms": elapsed
}
|