|
|
--- |
|
|
library_name: litert |
|
|
tags: |
|
|
- vision |
|
|
- image-classification |
|
|
datasets: |
|
|
- imagenet-1k |
|
|
model-index: |
|
|
- name: MobileNet V2 |
|
|
results: |
|
|
- task: |
|
|
type: image-classification |
|
|
name: Image Classification |
|
|
dataset: |
|
|
name: ImageNet-1k |
|
|
type: imagenet-1k |
|
|
config: default |
|
|
split: validation |
|
|
metrics: |
|
|
- name: Top 1 Accuracy (Full Precision) |
|
|
type: accuracy |
|
|
value: 0.7183 |
|
|
- name: Top 5 Accuracy (Full Precision) |
|
|
type: accuracy |
|
|
value: 0.9031 |
|
|
- name: Top 1 Accuracy (Dynamic Quantized wi8 afp32) |
|
|
type: accuracy |
|
|
value: 0.7183 |
|
|
- name: Top 5 Accuracy (Dynamic Quantized wi8 afp32) |
|
|
type: accuracy |
|
|
value: 0.9031 |
|
|
--- |
|
|
|
|
|
# MobileNet V2 |
|
|
|
|
|
MobileNet V2 model pre-trained on ImageNet-1k at resolution 224x224. It was introduced in [MobileNetV2: Inverted Residuals and Linear Bottlenecks](https://arxiv.org/abs/1801.04381) by Mark Sandler, Andrew Howard, Menglong Zhu, Andrey Zhmoginov, Liang-Chieh Chen. |
|
|
|
|
|
|
|
|
## Model description |
|
|
|
|
|
The model was converted from a checkpoint from PyTorch Vision. |
|
|
|
|
|
The original model has: |
|
|
acc@1 (on ImageNet-1K): 71.878% |
|
|
acc@5 (on ImageNet-1K): 90.286% |
|
|
num_params: 3,504,872 |
|
|
|
|
|
The license information of the original model was missing. |
|
|
|
|
|
|
|
|
## Use |
|
|
|
|
|
```python |
|
|
#!/usr/bin/env python3 |
|
|
import argparse, json |
|
|
import numpy as np |
|
|
from PIL import Image |
|
|
from huggingface_hub import hf_hub_download |
|
|
from ai_edge_litert.compiled_model import CompiledModel |
|
|
|
|
|
def preprocess(img: Image.Image) -> np.ndarray: |
|
|
img = img.convert("RGB") |
|
|
w, h = img.size |
|
|
s = 256 |
|
|
if w < h: |
|
|
img = img.resize((s, int(round(h * s / w))), Image.BILINEAR) |
|
|
else: |
|
|
img = img.resize((int(round(w * s / h)), s), Image.BILINEAR) |
|
|
left = (img.size[0] - 224) // 2 |
|
|
top = (img.size[1] - 224) // 2 |
|
|
img = img.crop((left, top, left + 224, top + 224)) |
|
|
|
|
|
x = np.asarray(img, dtype=np.float32) / 255.0 |
|
|
x = (x - np.array([0.485, 0.456, 0.406], dtype=np.float32)) / np.array( |
|
|
[0.229, 0.224, 0.225], dtype=np.float32 |
|
|
) |
|
|
return np.transpose(x, (2, 0, 1)) |
|
|
|
|
|
def main(): |
|
|
ap = argparse.ArgumentParser() |
|
|
ap.add_argument("--image", required=True) |
|
|
args = ap.parse_args() |
|
|
|
|
|
model_path = hf_hub_download("litert-community/MobileNet-v2", "mobilenet_v2.tflite") |
|
|
labels_path = hf_hub_download( |
|
|
"huggingface/label-files", "imagenet-1k-id2label.json", repo_type="dataset" |
|
|
) |
|
|
with open(labels_path, "r", encoding="utf-8") as f: |
|
|
id2label = {int(k): v for k, v in json.load(f).items()} |
|
|
|
|
|
img = Image.open(args.image) |
|
|
x = preprocess(img) |
|
|
|
|
|
model = CompiledModel.from_file(model_path) |
|
|
inp = model.create_input_buffers(0) |
|
|
out = model.create_output_buffers(0) |
|
|
|
|
|
inp[0].write(x) |
|
|
model.run_by_index(0, inp, out) |
|
|
|
|
|
req = model.get_output_buffer_requirements(0, 0) |
|
|
y = out[0].read(req["buffer_size"] // np.dtype(np.float32).itemsize, np.float32) |
|
|
|
|
|
pred = int(np.argmax(y)) |
|
|
label = id2label.get(pred, f"class_{pred}") |
|
|
|
|
|
print(f"Top-1 class index: {pred}") |
|
|
print(f"Top-1 label: {label}") |
|
|
if __name__ == "__main__": |
|
|
main() |
|
|
``` |
|
|
|
|
|
### BibTeX entry and citation info |
|
|
|
|
|
```bibtex |
|
|
@inproceedings{mobilenetv22018, |
|
|
title={MobileNetV2: Inverted Residuals and Linear Bottlenecks}, |
|
|
author={Mark Sandler and Andrew Howard and Menglong Zhu and Andrey Zhmoginov and Liang-Chieh Chen}, |
|
|
booktitle={CVPR}, |
|
|
year={2018} |
|
|
} |
|
|
``` |