File size: 6,938 Bytes

ed46e8d

"""
Evaluate a trained detection head on COCO val2017 using pycocotools mAP.

Usage:
    python eval_coco_map.py --checkpoint outputs/cofiber_threshold_full/head_final.pth --head cofiber_threshold
"""

import argparse
import json
import os
import sys
import time

import numpy as np
import torch
import torch.nn.functional as F
from PIL import Image
from torchvision.transforms import v2

sys.path.insert(0, os.path.dirname(__file__))

EUPE_REPO = os.environ.get("ARENA_BACKBONE_REPO", "/home/zootest/EUPE")
EUPE_WEIGHTS = os.environ.get("ARENA_BACKBONE_WEIGHTS", "/home/zootest/weights/eupe_vitb/EUPE-ViT-B.pt")
COCO_ROOT = os.environ.get("ARENA_COCO_ROOT", "/mnt/d/JacobProject/datasets/llava_instruct/coco")
RESOLUTION = 640

if EUPE_REPO not in sys.path:
    sys.path.insert(0, EUPE_REPO)

COCO_CONTIG_TO_CAT = [
    1,2,3,4,5,6,7,8,9,10,11,13,14,15,16,17,18,19,20,21,22,23,24,25,27,28,31,32,
    33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,49,50,51,52,53,54,55,56,57,58,
    59,60,61,62,63,64,65,67,70,72,73,74,75,76,77,78,79,80,81,82,84,85,86,87,88,89,90,
]


def letterbox(image, res):
    W0, H0 = image.size
    scale = res / max(H0, W0)
    new_w, new_h = int(round(W0 * scale)), int(round(H0 * scale))
    resized = image.resize((new_w, new_h), Image.BILINEAR)
    canvas = Image.new("RGB", (res, res), (0, 0, 0))
    canvas.paste(resized, (0, 0))
    return canvas, scale


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--checkpoint", required=True)
    parser.add_argument("--head", default="cofiber_threshold")
    parser.add_argument("--score-thresh", type=float, default=0.05)
    parser.add_argument("--max-images", type=int, default=5000)
    args = parser.parse_args()

    from pycocotools.coco import COCO
    from pycocotools.cocoeval import COCOeval

    print("=" * 60)
    print(f"COCO mAP Evaluation: {args.head}")
    print("=" * 60)

    # Load backbone
    print("\nLoading backbone...")
    backbone = torch.hub.load(EUPE_REPO, "eupe_vitb16", source="local", weights=EUPE_WEIGHTS)
    backbone = backbone.cuda().eval()
    for p in backbone.parameters():
        p.requires_grad = False

    # Load head
    print(f"Loading head: {args.head}")
    from heads import get_head
    head = get_head(args.head)
    state_dict = torch.load(args.checkpoint, map_location="cuda", weights_only=False)
    if "head" in state_dict:
        state_dict = state_dict["head"]
    head.load_state_dict(state_dict)
    head = head.cuda().eval()
    n_params = sum(p.numel() for p in head.parameters())
    print(f"  {n_params:,} params")

    # Precompute locations
    with torch.no_grad():
        dummy = torch.randn(1, 768, RESOLUTION // 16, RESOLUTION // 16, device="cuda")
        locs = head.get_locs(dummy)

    # Load COCO val
    ann_file = os.path.join(COCO_ROOT, "annotations", "instances_val2017.json")
    img_dir = os.path.join(COCO_ROOT, "val2017")
    coco_gt = COCO(ann_file)
    img_ids = sorted(coco_gt.getImgIds())[:args.max_images]
    print(f"  {len(img_ids)} val images")

    normalize = v2.Compose([
        v2.ToImage(), v2.ToDtype(torch.float32, scale=True),
        v2.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
    ])

    # Run inference
    print("\nRunning inference...")
    results = []
    t0 = time.time()

    for i, img_id in enumerate(img_ids):
        info = coco_gt.loadImgs(img_id)[0]
        img = Image.open(os.path.join(img_dir, info["file_name"])).convert("RGB")
        W0, H0 = img.size
        canvas, scale = letterbox(img, RESOLUTION)
        x = normalize(canvas).unsqueeze(0).cuda()

        with torch.no_grad():
            with torch.autocast("cuda", dtype=torch.bfloat16):
                out = backbone.forward_features(x)
            patches = out["x_norm_patchtokens"].float()
            B, N, D = patches.shape
            h = w = int(N ** 0.5)
            spatial = patches.permute(0, 2, 1).reshape(B, D, h, w)

            cls_l, reg_l, ctr_l = head(spatial)

        # Decode
        from utils.decode import decode_fcos
        dets = decode_fcos(cls_l, reg_l, ctr_l, locs,
                           score_thresh=args.score_thresh, nms_thresh=0.5, max_det=100)

        for det in dets:
            boxes = det["boxes"].cpu().numpy() / scale
            boxes[:, 0::2] = boxes[:, 0::2].clip(0, W0)
            boxes[:, 1::2] = boxes[:, 1::2].clip(0, H0)
            scores = det["scores"].cpu().numpy()
            labels = det["labels"].cpu().numpy()

            for box, score, label in zip(boxes, scores, labels):
                x1, y1, x2, y2 = box
                results.append({
                    "image_id": img_id,
                    "category_id": COCO_CONTIG_TO_CAT[int(label)],
                    "bbox": [float(x1), float(y1), float(x2 - x1), float(y2 - y1)],
                    "score": float(score),
                })

        if (i + 1) % 500 == 0:
            elapsed = time.time() - t0
            print(f"  {i+1}/{len(img_ids)} ({elapsed:.0f}s, {(i+1)/elapsed:.1f} img/s)", flush=True)

    elapsed = time.time() - t0
    print(f"\nInference complete: {len(img_ids)} images, {len(results)} detections, {elapsed:.0f}s")

    # Save results
    results_file = args.checkpoint.replace(".pth", "_coco_results.json")
    with open(results_file, "w") as f:
        json.dump(results, f)
    print(f"Saved: {results_file}")

    # Evaluate
    if len(results) == 0:
        print("\nNo detections produced. mAP = 0.0")
        return

    print("\nRunning pycocotools evaluation...")
    coco_dt = coco_gt.loadRes(results_file)
    coco_eval = COCOeval(coco_gt, coco_dt, "bbox")
    coco_eval.params.imgIds = img_ids
    coco_eval.evaluate()
    coco_eval.accumulate()
    coco_eval.summarize()

    # Save summary
    summary = {
        "head": args.head,
        "params": n_params,
        "checkpoint": args.checkpoint,
        "n_images": len(img_ids),
        "n_detections": len(results),
        "mAP_0.5_0.95": float(coco_eval.stats[0]),
        "mAP_0.50": float(coco_eval.stats[1]),
        "mAP_0.75": float(coco_eval.stats[2]),
        "mAP_small": float(coco_eval.stats[3]),
        "mAP_medium": float(coco_eval.stats[4]),
        "mAP_large": float(coco_eval.stats[5]),
    }
    summary_file = args.checkpoint.replace(".pth", "_coco_summary.json")
    with open(summary_file, "w") as f:
        json.dump(summary, f, indent=2)
    print(f"\nSaved: {summary_file}")

    print(f"\n{'='*60}")
    print(f"  {args.head}: {n_params:,} params")
    print(f"  mAP@[0.5:0.95] = {summary['mAP_0.5_0.95']:.1f}")
    print(f"  mAP@0.50       = {summary['mAP_0.50']:.1f}")
    print(f"  mAP@0.75       = {summary['mAP_0.75']:.1f}")
    print(f"  mAP small      = {summary['mAP_small']:.1f}")
    print(f"  mAP medium     = {summary['mAP_medium']:.1f}")
    print(f"  mAP large      = {summary['mAP_large']:.1f}")
    print(f"{'='*60}")


if __name__ == "__main__":
    main()