"""Standalone inference for the Planktoscope phytoplankton classifier. Requires: torch, timm, safetensors, torchvision, pillow, huggingface_hub. """ import json import torch import timm from huggingface_hub import hf_hub_download from safetensors.torch import load_file from PIL import Image, ImageOps import torchvision.transforms as T REPO = "patcdaniel/planktoscope-phytoplankton-classifier" cfg = json.load(open(hf_hub_download(REPO, "config.json"))) # rebuild frozen DINOv2 backbone + linear head, load published weights backbone = timm.create_model("vit_small_patch14_dinov2.lvd142m", pretrained=False, num_classes=0, img_size=cfg["image_size"]) head = torch.nn.Linear(cfg["embedding_dim"], cfg["num_labels"]) sd = load_file(hf_hub_download(REPO, "model.safetensors")) backbone.load_state_dict({k[9:]: v for k, v in sd.items() if k.startswith("backbone.")}) head.load_state_dict({k[5:]: v for k, v in sd.items() if k.startswith("head.")}) backbone.eval(); head.eval() # EXACT preprocessing: pad to square (fill matches light ROI background) -> resize -> ImageNet norm fill = tuple(cfg["preprocessing"]["pad_to_square_fill"]) size = cfg["image_size"] def preprocess(img: Image.Image): w, h = img.size; s = max(w, h) canvas = Image.new("RGB", (s, s), fill) canvas.paste(img.convert("RGB"), ((s - w) // 2, (s - h) // 2)) tf = T.Compose([T.Resize((size, size)), T.ToTensor(), T.Normalize(cfg["preprocessing"]["normalize_mean"], cfg["preprocessing"]["normalize_std"])]) return tf(canvas) @torch.no_grad() def predict(path): x = preprocess(Image.open(path)).unsqueeze(0) probs = head(backbone(x)).softmax(1)[0] conf, idx = probs.max(0) label = cfg["id2label"][str(int(idx))] # abstain on low confidence (recommended for raw, mixed samples) if conf < cfg["recommended_min_confidence"]: return "Unassigned", float(conf) return label, float(conf) print(predict("example.jpg"))