Cofiber Threshold: trained weights, COCO mAP 4.0 from 70K params, eval script

ed46e8d verified 1 day ago

6.94 kB

	"""
	Evaluate a trained detection head on COCO val2017 using pycocotools mAP.

	Usage:
	python eval_coco_map.py --checkpoint outputs/cofiber_threshold_full/head_final.pth --head cofiber_threshold
	"""

	import argparse
	import json
	import os
	import sys
	import time

	import numpy as np
	import torch
	import torch.nn.functional as F
	from PIL import Image
	from torchvision.transforms import v2

	sys.path.insert(0, os.path.dirname(__file__))

	EUPE_REPO = os.environ.get("ARENA_BACKBONE_REPO", "/home/zootest/EUPE")
	EUPE_WEIGHTS = os.environ.get("ARENA_BACKBONE_WEIGHTS", "/home/zootest/weights/eupe_vitb/EUPE-ViT-B.pt")
	COCO_ROOT = os.environ.get("ARENA_COCO_ROOT", "/mnt/d/JacobProject/datasets/llava_instruct/coco")
	RESOLUTION = 640

	if EUPE_REPO not in sys.path:
	sys.path.insert(0, EUPE_REPO)

	COCO_CONTIG_TO_CAT = [
	1,2,3,4,5,6,7,8,9,10,11,13,14,15,16,17,18,19,20,21,22,23,24,25,27,28,31,32,
	33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,49,50,51,52,53,54,55,56,57,58,
	59,60,61,62,63,64,65,67,70,72,73,74,75,76,77,78,79,80,81,82,84,85,86,87,88,89,90,
	]


	def letterbox(image, res):
	W0, H0 = image.size
	scale = res / max(H0, W0)
	new_w, new_h = int(round(W0 * scale)), int(round(H0 * scale))
	resized = image.resize((new_w, new_h), Image.BILINEAR)
	canvas = Image.new("RGB", (res, res), (0, 0, 0))
	canvas.paste(resized, (0, 0))
	return canvas, scale


	def main():
	parser = argparse.ArgumentParser()
	parser.add_argument("--checkpoint", required=True)
	parser.add_argument("--head", default="cofiber_threshold")
	parser.add_argument("--score-thresh", type=float, default=0.05)
	parser.add_argument("--max-images", type=int, default=5000)
	args = parser.parse_args()

	from pycocotools.coco import COCO
	from pycocotools.cocoeval import COCOeval

	print("=" * 60)
	print(f"COCO mAP Evaluation: {args.head}")
	print("=" * 60)

	# Load backbone
	print("\nLoading backbone...")
	backbone = torch.hub.load(EUPE_REPO, "eupe_vitb16", source="local", weights=EUPE_WEIGHTS)
	backbone = backbone.cuda().eval()
	for p in backbone.parameters():
	p.requires_grad = False

	# Load head
	print(f"Loading head: {args.head}")
	from heads import get_head
	head = get_head(args.head)
	state_dict = torch.load(args.checkpoint, map_location="cuda", weights_only=False)
	if "head" in state_dict:
	state_dict = state_dict["head"]
	head.load_state_dict(state_dict)
	head = head.cuda().eval()
	n_params = sum(p.numel() for p in head.parameters())
	print(f" {n_params:,} params")

	# Precompute locations
	with torch.no_grad():
	dummy = torch.randn(1, 768, RESOLUTION // 16, RESOLUTION // 16, device="cuda")
	locs = head.get_locs(dummy)

	# Load COCO val
	ann_file = os.path.join(COCO_ROOT, "annotations", "instances_val2017.json")
	img_dir = os.path.join(COCO_ROOT, "val2017")
	coco_gt = COCO(ann_file)
	img_ids = sorted(coco_gt.getImgIds())[:args.max_images]
	print(f" {len(img_ids)} val images")

	normalize = v2.Compose([
	v2.ToImage(), v2.ToDtype(torch.float32, scale=True),
	v2.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
	])

	# Run inference
	print("\nRunning inference...")
	results = []
	t0 = time.time()

	for i, img_id in enumerate(img_ids):
	info = coco_gt.loadImgs(img_id)[0]
	img = Image.open(os.path.join(img_dir, info["file_name"])).convert("RGB")
	W0, H0 = img.size
	canvas, scale = letterbox(img, RESOLUTION)
	x = normalize(canvas).unsqueeze(0).cuda()

	with torch.no_grad():
	with torch.autocast("cuda", dtype=torch.bfloat16):
	out = backbone.forward_features(x)
	patches = out["x_norm_patchtokens"].float()
	B, N, D = patches.shape
	h = w = int(N ** 0.5)
	spatial = patches.permute(0, 2, 1).reshape(B, D, h, w)

	cls_l, reg_l, ctr_l = head(spatial)

	# Decode
	from utils.decode import decode_fcos
	dets = decode_fcos(cls_l, reg_l, ctr_l, locs,
	score_thresh=args.score_thresh, nms_thresh=0.5, max_det=100)

	for det in dets:
	boxes = det["boxes"].cpu().numpy() / scale
	boxes[:, 0::2] = boxes[:, 0::2].clip(0, W0)
	boxes[:, 1::2] = boxes[:, 1::2].clip(0, H0)
	scores = det["scores"].cpu().numpy()
	labels = det["labels"].cpu().numpy()

	for box, score, label in zip(boxes, scores, labels):
	x1, y1, x2, y2 = box
	results.append({
	"image_id": img_id,
	"category_id": COCO_CONTIG_TO_CAT[int(label)],
	"bbox": [float(x1), float(y1), float(x2 - x1), float(y2 - y1)],
	"score": float(score),
	})

	if (i + 1) % 500 == 0:
	elapsed = time.time() - t0
	print(f" {i+1}/{len(img_ids)} ({elapsed:.0f}s, {(i+1)/elapsed:.1f} img/s)", flush=True)

	elapsed = time.time() - t0
	print(f"\nInference complete: {len(img_ids)} images, {len(results)} detections, {elapsed:.0f}s")

	# Save results
	results_file = args.checkpoint.replace(".pth", "_coco_results.json")
	with open(results_file, "w") as f:
	json.dump(results, f)
	print(f"Saved: {results_file}")

	# Evaluate
	if len(results) == 0:
	print("\nNo detections produced. mAP = 0.0")
	return

	print("\nRunning pycocotools evaluation...")
	coco_dt = coco_gt.loadRes(results_file)
	coco_eval = COCOeval(coco_gt, coco_dt, "bbox")
	coco_eval.params.imgIds = img_ids
	coco_eval.evaluate()
	coco_eval.accumulate()
	coco_eval.summarize()

	# Save summary
	summary = {
	"head": args.head,
	"params": n_params,
	"checkpoint": args.checkpoint,
	"n_images": len(img_ids),
	"n_detections": len(results),
	"mAP_0.5_0.95": float(coco_eval.stats[0]),
	"mAP_0.50": float(coco_eval.stats[1]),
	"mAP_0.75": float(coco_eval.stats[2]),
	"mAP_small": float(coco_eval.stats[3]),
	"mAP_medium": float(coco_eval.stats[4]),
	"mAP_large": float(coco_eval.stats[5]),
	}
	summary_file = args.checkpoint.replace(".pth", "_coco_summary.json")
	with open(summary_file, "w") as f:
	json.dump(summary, f, indent=2)
	print(f"\nSaved: {summary_file}")

	print(f"\n{'='*60}")
	print(f" {args.head}: {n_params:,} params")
	print(f" mAP@[0.5:0.95] = {summary['mAP_0.5_0.95']:.1f}")
	print(f" mAP@0.50 = {summary['mAP_0.50']:.1f}")
	print(f" mAP@0.75 = {summary['mAP_0.75']:.1f}")
	print(f" mAP small = {summary['mAP_small']:.1f}")
	print(f" mAP medium = {summary['mAP_medium']:.1f}")
	print(f" mAP large = {summary['mAP_large']:.1f}")
	print(f"{'='*60}")


	if __name__ == "__main__":
	main()