File size: 2,045 Bytes
1345ccb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
"""Standalone inference for the Planktoscope phytoplankton classifier.
Requires: torch, timm, safetensors, torchvision, pillow, huggingface_hub.
"""
import json
import torch
import timm
from huggingface_hub import hf_hub_download
from safetensors.torch import load_file
from PIL import Image, ImageOps
import torchvision.transforms as T

REPO = "patcdaniel/planktoscope-phytoplankton-classifier"
cfg = json.load(open(hf_hub_download(REPO, "config.json")))

# rebuild frozen DINOv2 backbone + linear head, load published weights
backbone = timm.create_model("vit_small_patch14_dinov2.lvd142m",
                             pretrained=False, num_classes=0,
                             img_size=cfg["image_size"])
head = torch.nn.Linear(cfg["embedding_dim"], cfg["num_labels"])
sd = load_file(hf_hub_download(REPO, "model.safetensors"))
backbone.load_state_dict({k[9:]: v for k, v in sd.items() if k.startswith("backbone.")})
head.load_state_dict({k[5:]: v for k, v in sd.items() if k.startswith("head.")})
backbone.eval(); head.eval()

# EXACT preprocessing: pad to square (fill matches light ROI background) -> resize -> ImageNet norm
fill = tuple(cfg["preprocessing"]["pad_to_square_fill"])
size = cfg["image_size"]
def preprocess(img: Image.Image):
    w, h = img.size; s = max(w, h)
    canvas = Image.new("RGB", (s, s), fill)
    canvas.paste(img.convert("RGB"), ((s - w) // 2, (s - h) // 2))
    tf = T.Compose([T.Resize((size, size)), T.ToTensor(),
                    T.Normalize(cfg["preprocessing"]["normalize_mean"],
                                cfg["preprocessing"]["normalize_std"])])
    return tf(canvas)

@torch.no_grad()
def predict(path):
    x = preprocess(Image.open(path)).unsqueeze(0)
    probs = head(backbone(x)).softmax(1)[0]
    conf, idx = probs.max(0)
    label = cfg["id2label"][str(int(idx))]
    # abstain on low confidence (recommended for raw, mixed samples)
    if conf < cfg["recommended_min_confidence"]:
        return "Unassigned", float(conf)
    return label, float(conf)

print(predict("example.jpg"))