File size: 5,484 Bytes

1161bb5

#!/usr/bin/env python3
import argparse
import torch
from PIL import Image
from torchvision import datasets, transforms


IMAGENET_MEAN = [0.485, 0.456, 0.406]
IMAGENET_STD = [0.229, 0.224, 0.225]
HARDCODED_WNID_TO_IDX_101 = {'n01440764': 0, 'n01443537': 1, 'n01484850': 2, 'n01491361': 3, 'n01494475': 4, 'n01496331': 5, 'n01498041': 6, 'n01514668': 7, 'n01514859': 8, 'n01531178': 9, 'n01537544': 10, 'n01560419': 11, 'n01582220': 12, 'n01592084': 13, 'n01601694': 14, 'n01608432': 15, 'n01614925': 16, 'n01622779': 17, 'n01630670': 18, 'n01632458': 19, 'n01632777': 20, 'n01644900': 21, 'n01664065': 22, 'n01665541': 23, 'n01667114': 24, 'n01667778': 25, 'n01675722': 26, 'n01677366': 27, 'n01685808': 28, 'n01687978': 29, 'n01693334': 30, 'n01695060': 31, 'n01698640': 32, 'n01728572': 33, 'n01729322': 34, 'n01729977': 35, 'n01734418': 36, 'n01735189': 37, 'n01739381': 38, 'n01740131': 39, 'n01742172': 40, 'n01749939': 41, 'n01751748': 42, 'n01753488': 43, 'n01755581': 44, 'n01756291': 45, 'n01770081': 46, 'n01770393': 47, 'n01773157': 48, 'n01773549': 49, 'n01773797': 50, 'n01774384': 51, 'n01774750': 52, 'n01775062': 53, 'n01776313': 54, 'n01795545': 55, 'n01796340': 56, 'n01798484': 57, 'n01806143': 58, 'n01818515': 59, 'n01819313': 60, 'n01820546': 61, 'n01824575': 62, 'n01828970': 63, 'n01829413': 64, 'n01833805': 65, 'n01843383': 66, 'n01847000': 67, 'n01855672': 68, 'n01860187': 69, 'n01877812': 70, 'n01883070': 71, 'n01910747': 72, 'n01914609': 73, 'n01924916': 74, 'n01930112': 75, 'n01943899': 76, 'n01944390': 77, 'n01950731': 78, 'n01955084': 79, 'n01968897': 80, 'n01978287': 81, 'n01978455': 82, 'n01984695': 83, 'n01985128': 84, 'n01986214': 85, 'n02002556': 86, 'n02006656': 87, 'n02007558': 88, 'n02011460': 89, 'n02012849': 90, 'n02013706': 91, 'n02018207': 92, 'n02018795': 93, 'n02027492': 94, 'n02028035': 95, 'n02037110': 96, 'n02051845': 97, 'n02058221': 98, 'n02077923': 99, 'n02391049': 100}


def preprocess_image(image_path, input_image_size):
    image = Image.open(image_path).convert("RGB")
    transform_list = []
    if image.size[0] != input_image_size or image.size[1] != input_image_size:
        transform_list.extend([
            transforms.Resize(input_image_size, interpolation=Image.BICUBIC),
            transforms.CenterCrop(input_image_size),
        ])
    transform_list.extend([
        transforms.ToTensor(),
        transforms.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD),
    ])
    transform = transforms.Compose(transform_list)
    tensor = transform(image).unsqueeze(0)
    return tensor


def load_wnid_to_name(cls_map_path):
    if not cls_map_path:
        return None
    wnid_to_name = {}
    with open(cls_map_path, "r", encoding="utf-8") as handle:
        for line in handle:
            line = line.strip()
            if not line:
                continue
            parts = line.split()
            if len(parts) < 3:
                continue
            wnid = parts[0]
            class_name = " ".join(parts[2:])
            wnid_to_name[wnid] = class_name
    return wnid_to_name if wnid_to_name else None


def load_idx_to_name_from_val_dir(val_dir, wnid_to_name):
    if not val_dir:
        idx_to_wnid = {idx: wnid for wnid, idx in HARDCODED_WNID_TO_IDX_101.items()}
    else:
        dataset = datasets.ImageFolder(val_dir)
        idx_to_wnid = {v: k for k, v in dataset.class_to_idx.items()}
    idx_to_name = {}
    for idx, wnid in idx_to_wnid.items():
        idx_to_name[idx] = wnid_to_name.get(wnid, wnid) if wnid_to_name else wnid
    return idx_to_name if idx_to_name else None


def topk_from_logits(logits, idx_to_class, k=5):
    probs = torch.softmax(logits, dim=1)
    values, indices = torch.topk(probs, k=k, dim=1)
    values = values.squeeze(0).tolist()
    indices = indices.squeeze(0).tolist()
    results = []
    for score, idx in zip(values, indices):
        cls_name = idx_to_class.get(idx, str(idx)) if idx_to_class else str(idx)
        results.append((idx, cls_name, score))
    return results


def main():
    parser = argparse.ArgumentParser(description="TorchScript single-image inference.")
    parser.add_argument("--torchscript", type=str, required=True,
                        help="Path to TorchScript .pt file")
    parser.add_argument("--image_path", type=str, required=True,
                        help="Path to input image")
    parser.add_argument("--input_image_size", type=int, default=224)
    parser.add_argument("--val_dir", type=str,
                        default=None,
                        help="Val dir to derive class index -> wnid mapping")
    parser.add_argument("--cls_map_path", type=str,
                        default="/scratch/general/vast/j.yan/nas_tvm/cls_map.txt",
                        help="Path to cls_map.txt for wnid -> class name mapping")
    parser.add_argument("--topk", type=int, default=1)
    args = parser.parse_args()

    model = torch.jit.load(args.torchscript, map_location="cpu")
    model.eval()

    input_tensor = preprocess_image(args.image_path, args.input_image_size)
    wnid_to_name = load_wnid_to_name(args.cls_map_path)
    idx_to_class = load_idx_to_name_from_val_dir(args.val_dir, wnid_to_name)

    with torch.no_grad():
        logits = model(input_tensor)

    print("[torchscript] top-{}:".format(args.topk))
    for _, cls_name, score in topk_from_logits(logits, idx_to_class, k=args.topk):
        print(f" class={cls_name} prob={score:.6f}")


if __name__ == "__main__":
    main()