| import onnxruntime as ort |
| import numpy as np |
| from PIL import Image |
| import io |
| import time |
| from transformers import AutoImageProcessor, ResNetForImageClassification |
|
|
| |
| processor = AutoImageProcessor.from_pretrained("microsoft/resnet-18") |
|
|
| |
| sess_options = ort.SessionOptions() |
| sess_options.intra_op_num_threads = 1 |
| sess_options.inter_op_num_threads = 1 |
| sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL |
|
|
| |
| session = ort.InferenceSession( |
| "models/resnet18_quantized.onnx", |
| sess_options=sess_options, |
| providers=["CPUExecutionProvider"] |
| ) |
|
|
| |
| cfg = ResNetForImageClassification.from_pretrained("microsoft/resnet-18").config |
|
|
| def run_inference(image_bytes: bytes) -> dict: |
| img = Image.open(io.BytesIO(image_bytes)).convert("RGB") |
| inputs = processor(images=img, return_tensors="np") |
| pixel_values = inputs["pixel_values"].astype(np.float32) |
|
|
| t0 = time.perf_counter() |
| outputs = session.run(["logits"], {"pixel_values": pixel_values}) |
| elapsed = (time.perf_counter() - t0) * 1000 |
|
|
| logits = outputs[0][0] |
| predicted_class_id = int(np.argmax(logits)) |
| |
| return { |
| "label": cfg.id2label[predicted_class_id], |
| "score": float(np.exp(logits[predicted_class_id]) / np.sum(np.exp(logits))), |
| "label_id": predicted_class_id, |
| "inference_time_ms": elapsed |
| } |
|
|