detection-heads / eval_coco_map.py
phanerozoic's picture
Cofiber Threshold: trained weights, COCO mAP 4.0 from 70K params, eval script
ed46e8d verified
"""
Evaluate a trained detection head on COCO val2017 using pycocotools mAP.
Usage:
python eval_coco_map.py --checkpoint outputs/cofiber_threshold_full/head_final.pth --head cofiber_threshold
"""
import argparse
import json
import os
import sys
import time
import numpy as np
import torch
import torch.nn.functional as F
from PIL import Image
from torchvision.transforms import v2
sys.path.insert(0, os.path.dirname(__file__))
EUPE_REPO = os.environ.get("ARENA_BACKBONE_REPO", "/home/zootest/EUPE")
EUPE_WEIGHTS = os.environ.get("ARENA_BACKBONE_WEIGHTS", "/home/zootest/weights/eupe_vitb/EUPE-ViT-B.pt")
COCO_ROOT = os.environ.get("ARENA_COCO_ROOT", "/mnt/d/JacobProject/datasets/llava_instruct/coco")
RESOLUTION = 640
if EUPE_REPO not in sys.path:
sys.path.insert(0, EUPE_REPO)
COCO_CONTIG_TO_CAT = [
1,2,3,4,5,6,7,8,9,10,11,13,14,15,16,17,18,19,20,21,22,23,24,25,27,28,31,32,
33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,49,50,51,52,53,54,55,56,57,58,
59,60,61,62,63,64,65,67,70,72,73,74,75,76,77,78,79,80,81,82,84,85,86,87,88,89,90,
]
def letterbox(image, res):
W0, H0 = image.size
scale = res / max(H0, W0)
new_w, new_h = int(round(W0 * scale)), int(round(H0 * scale))
resized = image.resize((new_w, new_h), Image.BILINEAR)
canvas = Image.new("RGB", (res, res), (0, 0, 0))
canvas.paste(resized, (0, 0))
return canvas, scale
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--checkpoint", required=True)
parser.add_argument("--head", default="cofiber_threshold")
parser.add_argument("--score-thresh", type=float, default=0.05)
parser.add_argument("--max-images", type=int, default=5000)
args = parser.parse_args()
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
print("=" * 60)
print(f"COCO mAP Evaluation: {args.head}")
print("=" * 60)
# Load backbone
print("\nLoading backbone...")
backbone = torch.hub.load(EUPE_REPO, "eupe_vitb16", source="local", weights=EUPE_WEIGHTS)
backbone = backbone.cuda().eval()
for p in backbone.parameters():
p.requires_grad = False
# Load head
print(f"Loading head: {args.head}")
from heads import get_head
head = get_head(args.head)
state_dict = torch.load(args.checkpoint, map_location="cuda", weights_only=False)
if "head" in state_dict:
state_dict = state_dict["head"]
head.load_state_dict(state_dict)
head = head.cuda().eval()
n_params = sum(p.numel() for p in head.parameters())
print(f" {n_params:,} params")
# Precompute locations
with torch.no_grad():
dummy = torch.randn(1, 768, RESOLUTION // 16, RESOLUTION // 16, device="cuda")
locs = head.get_locs(dummy)
# Load COCO val
ann_file = os.path.join(COCO_ROOT, "annotations", "instances_val2017.json")
img_dir = os.path.join(COCO_ROOT, "val2017")
coco_gt = COCO(ann_file)
img_ids = sorted(coco_gt.getImgIds())[:args.max_images]
print(f" {len(img_ids)} val images")
normalize = v2.Compose([
v2.ToImage(), v2.ToDtype(torch.float32, scale=True),
v2.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
])
# Run inference
print("\nRunning inference...")
results = []
t0 = time.time()
for i, img_id in enumerate(img_ids):
info = coco_gt.loadImgs(img_id)[0]
img = Image.open(os.path.join(img_dir, info["file_name"])).convert("RGB")
W0, H0 = img.size
canvas, scale = letterbox(img, RESOLUTION)
x = normalize(canvas).unsqueeze(0).cuda()
with torch.no_grad():
with torch.autocast("cuda", dtype=torch.bfloat16):
out = backbone.forward_features(x)
patches = out["x_norm_patchtokens"].float()
B, N, D = patches.shape
h = w = int(N ** 0.5)
spatial = patches.permute(0, 2, 1).reshape(B, D, h, w)
cls_l, reg_l, ctr_l = head(spatial)
# Decode
from utils.decode import decode_fcos
dets = decode_fcos(cls_l, reg_l, ctr_l, locs,
score_thresh=args.score_thresh, nms_thresh=0.5, max_det=100)
for det in dets:
boxes = det["boxes"].cpu().numpy() / scale
boxes[:, 0::2] = boxes[:, 0::2].clip(0, W0)
boxes[:, 1::2] = boxes[:, 1::2].clip(0, H0)
scores = det["scores"].cpu().numpy()
labels = det["labels"].cpu().numpy()
for box, score, label in zip(boxes, scores, labels):
x1, y1, x2, y2 = box
results.append({
"image_id": img_id,
"category_id": COCO_CONTIG_TO_CAT[int(label)],
"bbox": [float(x1), float(y1), float(x2 - x1), float(y2 - y1)],
"score": float(score),
})
if (i + 1) % 500 == 0:
elapsed = time.time() - t0
print(f" {i+1}/{len(img_ids)} ({elapsed:.0f}s, {(i+1)/elapsed:.1f} img/s)", flush=True)
elapsed = time.time() - t0
print(f"\nInference complete: {len(img_ids)} images, {len(results)} detections, {elapsed:.0f}s")
# Save results
results_file = args.checkpoint.replace(".pth", "_coco_results.json")
with open(results_file, "w") as f:
json.dump(results, f)
print(f"Saved: {results_file}")
# Evaluate
if len(results) == 0:
print("\nNo detections produced. mAP = 0.0")
return
print("\nRunning pycocotools evaluation...")
coco_dt = coco_gt.loadRes(results_file)
coco_eval = COCOeval(coco_gt, coco_dt, "bbox")
coco_eval.params.imgIds = img_ids
coco_eval.evaluate()
coco_eval.accumulate()
coco_eval.summarize()
# Save summary
summary = {
"head": args.head,
"params": n_params,
"checkpoint": args.checkpoint,
"n_images": len(img_ids),
"n_detections": len(results),
"mAP_0.5_0.95": float(coco_eval.stats[0]),
"mAP_0.50": float(coco_eval.stats[1]),
"mAP_0.75": float(coco_eval.stats[2]),
"mAP_small": float(coco_eval.stats[3]),
"mAP_medium": float(coco_eval.stats[4]),
"mAP_large": float(coco_eval.stats[5]),
}
summary_file = args.checkpoint.replace(".pth", "_coco_summary.json")
with open(summary_file, "w") as f:
json.dump(summary, f, indent=2)
print(f"\nSaved: {summary_file}")
print(f"\n{'='*60}")
print(f" {args.head}: {n_params:,} params")
print(f" mAP@[0.5:0.95] = {summary['mAP_0.5_0.95']:.1f}")
print(f" mAP@0.50 = {summary['mAP_0.50']:.1f}")
print(f" mAP@0.75 = {summary['mAP_0.75']:.1f}")
print(f" mAP small = {summary['mAP_small']:.1f}")
print(f" mAP medium = {summary['mAP_medium']:.1f}")
print(f" mAP large = {summary['mAP_large']:.1f}")
print(f"{'='*60}")
if __name__ == "__main__":
main()