| | |
| | |
| |
|
| | import logging |
| | import numpy as np |
| | import os |
| | import tempfile |
| | import xml.etree.ElementTree as ET |
| | from collections import OrderedDict, defaultdict |
| | from functools import lru_cache |
| | import torch |
| |
|
| | from detectron2.data import MetadataCatalog |
| | from detectron2.utils import comm |
| | from detectron2.utils.file_io import PathManager |
| |
|
| | from .evaluator import DatasetEvaluator |
| |
|
| |
|
| | class PascalVOCDetectionEvaluator(DatasetEvaluator): |
| | """ |
| | Evaluate Pascal VOC style AP for Pascal VOC dataset. |
| | It contains a synchronization, therefore has to be called from all ranks. |
| | |
| | Note that the concept of AP can be implemented in different ways and may not |
| | produce identical results. This class mimics the implementation of the official |
| | Pascal VOC Matlab API, and should produce similar but not identical results to the |
| | official API. |
| | """ |
| |
|
| | def __init__(self, dataset_name): |
| | """ |
| | Args: |
| | dataset_name (str): name of the dataset, e.g., "voc_2007_test" |
| | """ |
| | self._dataset_name = dataset_name |
| | meta = MetadataCatalog.get(dataset_name) |
| |
|
| | |
| | annotation_dir_local = PathManager.get_local_path( |
| | os.path.join(meta.dirname, "Annotations/") |
| | ) |
| | self._anno_file_template = os.path.join(annotation_dir_local, "{}.xml") |
| | self._image_set_path = os.path.join(meta.dirname, "ImageSets", "Main", meta.split + ".txt") |
| | self._class_names = meta.thing_classes |
| | assert meta.year in [2007, 2012], meta.year |
| | self._is_2007 = meta.year == 2007 |
| | self._cpu_device = torch.device("cpu") |
| | self._logger = logging.getLogger(__name__) |
| |
|
| | def reset(self): |
| | self._predictions = defaultdict(list) |
| |
|
| | def process(self, inputs, outputs): |
| | for input, output in zip(inputs, outputs): |
| | image_id = input["image_id"] |
| | instances = output["instances"].to(self._cpu_device) |
| | boxes = instances.pred_boxes.tensor.numpy() |
| | scores = instances.scores.tolist() |
| | classes = instances.pred_classes.tolist() |
| | for box, score, cls in zip(boxes, scores, classes): |
| | xmin, ymin, xmax, ymax = box |
| | |
| | xmin += 1 |
| | ymin += 1 |
| | self._predictions[cls].append( |
| | f"{image_id} {score:.3f} {xmin:.1f} {ymin:.1f} {xmax:.1f} {ymax:.1f}" |
| | ) |
| |
|
| | def evaluate(self): |
| | """ |
| | Returns: |
| | dict: has a key "segm", whose value is a dict of "AP", "AP50", and "AP75". |
| | """ |
| | all_predictions = comm.gather(self._predictions, dst=0) |
| | if not comm.is_main_process(): |
| | return |
| | predictions = defaultdict(list) |
| | for predictions_per_rank in all_predictions: |
| | for clsid, lines in predictions_per_rank.items(): |
| | predictions[clsid].extend(lines) |
| | del all_predictions |
| |
|
| | self._logger.info( |
| | "Evaluating {} using {} metric. " |
| | "Note that results do not use the official Matlab API.".format( |
| | self._dataset_name, 2007 if self._is_2007 else 2012 |
| | ) |
| | ) |
| |
|
| | with tempfile.TemporaryDirectory(prefix="pascal_voc_eval_") as dirname: |
| | res_file_template = os.path.join(dirname, "{}.txt") |
| |
|
| | aps = defaultdict(list) |
| | for cls_id, cls_name in enumerate(self._class_names): |
| | lines = predictions.get(cls_id, [""]) |
| |
|
| | with open(res_file_template.format(cls_name), "w") as f: |
| | f.write("\n".join(lines)) |
| |
|
| | for thresh in range(50, 100, 5): |
| | rec, prec, ap = voc_eval( |
| | res_file_template, |
| | self._anno_file_template, |
| | self._image_set_path, |
| | cls_name, |
| | ovthresh=thresh / 100.0, |
| | use_07_metric=self._is_2007, |
| | ) |
| | aps[thresh].append(ap * 100) |
| |
|
| | ret = OrderedDict() |
| | mAP = {iou: np.mean(x) for iou, x in aps.items()} |
| | ret["bbox"] = {"AP": np.mean(list(mAP.values())), "AP50": mAP[50], "AP75": mAP[75]} |
| | return ret |
| |
|
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | """Python implementation of the PASCAL VOC devkit's AP evaluation code.""" |
| |
|
| |
|
| | @lru_cache(maxsize=None) |
| | def parse_rec(filename): |
| | """Parse a PASCAL VOC xml file.""" |
| | with PathManager.open(filename) as f: |
| | tree = ET.parse(f) |
| | objects = [] |
| | for obj in tree.findall("object"): |
| | obj_struct = {} |
| | obj_struct["name"] = obj.find("name").text |
| | obj_struct["pose"] = obj.find("pose").text |
| | obj_struct["truncated"] = int(obj.find("truncated").text) |
| | obj_struct["difficult"] = int(obj.find("difficult").text) |
| | bbox = obj.find("bndbox") |
| | obj_struct["bbox"] = [ |
| | int(bbox.find("xmin").text), |
| | int(bbox.find("ymin").text), |
| | int(bbox.find("xmax").text), |
| | int(bbox.find("ymax").text), |
| | ] |
| | objects.append(obj_struct) |
| |
|
| | return objects |
| |
|
| |
|
| | def voc_ap(rec, prec, use_07_metric=False): |
| | """Compute VOC AP given precision and recall. If use_07_metric is true, uses |
| | the VOC 07 11-point method (default:False). |
| | """ |
| | if use_07_metric: |
| | |
| | ap = 0.0 |
| | for t in np.arange(0.0, 1.1, 0.1): |
| | if np.sum(rec >= t) == 0: |
| | p = 0 |
| | else: |
| | p = np.max(prec[rec >= t]) |
| | ap = ap + p / 11.0 |
| | else: |
| | |
| | |
| | mrec = np.concatenate(([0.0], rec, [1.0])) |
| | mpre = np.concatenate(([0.0], prec, [0.0])) |
| |
|
| | |
| | for i in range(mpre.size - 1, 0, -1): |
| | mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) |
| |
|
| | |
| | |
| | i = np.where(mrec[1:] != mrec[:-1])[0] |
| |
|
| | |
| | ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) |
| | return ap |
| |
|
| |
|
| | def voc_eval(detpath, annopath, imagesetfile, classname, ovthresh=0.5, use_07_metric=False): |
| | """rec, prec, ap = voc_eval(detpath, |
| | annopath, |
| | imagesetfile, |
| | classname, |
| | [ovthresh], |
| | [use_07_metric]) |
| | |
| | Top level function that does the PASCAL VOC evaluation. |
| | |
| | detpath: Path to detections |
| | detpath.format(classname) should produce the detection results file. |
| | annopath: Path to annotations |
| | annopath.format(imagename) should be the xml annotations file. |
| | imagesetfile: Text file containing the list of images, one image per line. |
| | classname: Category name (duh) |
| | [ovthresh]: Overlap threshold (default = 0.5) |
| | [use_07_metric]: Whether to use VOC07's 11 point AP computation |
| | (default False) |
| | """ |
| | |
| | |
| | |
| |
|
| | |
| | |
| | with PathManager.open(imagesetfile, "r") as f: |
| | lines = f.readlines() |
| | imagenames = [x.strip() for x in lines] |
| |
|
| | |
| | recs = {} |
| | for imagename in imagenames: |
| | recs[imagename] = parse_rec(annopath.format(imagename)) |
| |
|
| | |
| | class_recs = {} |
| | npos = 0 |
| | for imagename in imagenames: |
| | R = [obj for obj in recs[imagename] if obj["name"] == classname] |
| | bbox = np.array([x["bbox"] for x in R]) |
| | difficult = np.array([x["difficult"] for x in R]).astype(bool) |
| | |
| | det = [False] * len(R) |
| | npos = npos + sum(~difficult) |
| | class_recs[imagename] = {"bbox": bbox, "difficult": difficult, "det": det} |
| |
|
| | |
| | detfile = detpath.format(classname) |
| | with open(detfile, "r") as f: |
| | lines = f.readlines() |
| |
|
| | splitlines = [x.strip().split(" ") for x in lines] |
| | image_ids = [x[0] for x in splitlines] |
| | confidence = np.array([float(x[1]) for x in splitlines]) |
| | BB = np.array([[float(z) for z in x[2:]] for x in splitlines]).reshape(-1, 4) |
| |
|
| | |
| | sorted_ind = np.argsort(-confidence) |
| | BB = BB[sorted_ind, :] |
| | image_ids = [image_ids[x] for x in sorted_ind] |
| |
|
| | |
| | nd = len(image_ids) |
| | tp = np.zeros(nd) |
| | fp = np.zeros(nd) |
| | for d in range(nd): |
| | R = class_recs[image_ids[d]] |
| | bb = BB[d, :].astype(float) |
| | ovmax = -np.inf |
| | BBGT = R["bbox"].astype(float) |
| |
|
| | if BBGT.size > 0: |
| | |
| | |
| | ixmin = np.maximum(BBGT[:, 0], bb[0]) |
| | iymin = np.maximum(BBGT[:, 1], bb[1]) |
| | ixmax = np.minimum(BBGT[:, 2], bb[2]) |
| | iymax = np.minimum(BBGT[:, 3], bb[3]) |
| | iw = np.maximum(ixmax - ixmin + 1.0, 0.0) |
| | ih = np.maximum(iymax - iymin + 1.0, 0.0) |
| | inters = iw * ih |
| |
|
| | |
| | uni = ( |
| | (bb[2] - bb[0] + 1.0) * (bb[3] - bb[1] + 1.0) |
| | + (BBGT[:, 2] - BBGT[:, 0] + 1.0) * (BBGT[:, 3] - BBGT[:, 1] + 1.0) |
| | - inters |
| | ) |
| |
|
| | overlaps = inters / uni |
| | ovmax = np.max(overlaps) |
| | jmax = np.argmax(overlaps) |
| |
|
| | if ovmax > ovthresh: |
| | if not R["difficult"][jmax]: |
| | if not R["det"][jmax]: |
| | tp[d] = 1.0 |
| | R["det"][jmax] = 1 |
| | else: |
| | fp[d] = 1.0 |
| | else: |
| | fp[d] = 1.0 |
| |
|
| | |
| | fp = np.cumsum(fp) |
| | tp = np.cumsum(tp) |
| | rec = tp / float(npos) |
| | |
| | |
| | prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps) |
| | ap = voc_ap(rec, prec, use_07_metric) |
| |
|
| | return rec, prec, ap |
| |
|