|
|
| #include "cocoeval.h"
|
| #include <time.h>
|
| #include <algorithm>
|
| #include <cstdint>
|
| #include <numeric>
|
|
|
| using namespace pybind11::literals;
|
|
|
| namespace detectron2 {
|
|
|
| namespace COCOeval {
|
|
|
|
|
|
|
|
|
|
|
| void SortInstancesByDetectionScore(
|
| const std::vector<InstanceAnnotation>& detection_instances,
|
| std::vector<uint64_t>* detection_sorted_indices) {
|
| detection_sorted_indices->resize(detection_instances.size());
|
| std::iota(
|
| detection_sorted_indices->begin(), detection_sorted_indices->end(), 0);
|
| std::stable_sort(
|
| detection_sorted_indices->begin(),
|
| detection_sorted_indices->end(),
|
| [&detection_instances](size_t j1, size_t j2) {
|
| return detection_instances[j1].score > detection_instances[j2].score;
|
| });
|
| }
|
|
|
|
|
|
|
| void SortInstancesByIgnore(
|
| const std::array<double, 2>& area_range,
|
| const std::vector<InstanceAnnotation>& ground_truth_instances,
|
| std::vector<uint64_t>* ground_truth_sorted_indices,
|
| std::vector<bool>* ignores) {
|
| ignores->clear();
|
| ignores->reserve(ground_truth_instances.size());
|
| for (auto o : ground_truth_instances) {
|
| ignores->push_back(
|
| o.ignore || o.area < area_range[0] || o.area > area_range[1]);
|
| }
|
|
|
| ground_truth_sorted_indices->resize(ground_truth_instances.size());
|
| std::iota(
|
| ground_truth_sorted_indices->begin(),
|
| ground_truth_sorted_indices->end(),
|
| 0);
|
| std::stable_sort(
|
| ground_truth_sorted_indices->begin(),
|
| ground_truth_sorted_indices->end(),
|
| [&ignores](size_t j1, size_t j2) {
|
| return (int)(*ignores)[j1] < (int)(*ignores)[j2];
|
| });
|
| }
|
|
|
|
|
|
|
| void MatchDetectionsToGroundTruth(
|
| const std::vector<InstanceAnnotation>& detection_instances,
|
| const std::vector<uint64_t>& detection_sorted_indices,
|
| const std::vector<InstanceAnnotation>& ground_truth_instances,
|
| const std::vector<uint64_t>& ground_truth_sorted_indices,
|
| const std::vector<bool>& ignores,
|
| const std::vector<std::vector<double>>& ious,
|
| const std::vector<double>& iou_thresholds,
|
| const std::array<double, 2>& area_range,
|
| ImageEvaluation* results) {
|
|
|
| const int num_iou_thresholds = iou_thresholds.size();
|
| const int num_ground_truth = ground_truth_sorted_indices.size();
|
| const int num_detections = detection_sorted_indices.size();
|
| std::vector<uint64_t> ground_truth_matches(
|
| num_iou_thresholds * num_ground_truth, 0);
|
| std::vector<uint64_t>& detection_matches = results->detection_matches;
|
| std::vector<bool>& detection_ignores = results->detection_ignores;
|
| std::vector<bool>& ground_truth_ignores = results->ground_truth_ignores;
|
| detection_matches.resize(num_iou_thresholds * num_detections, 0);
|
| detection_ignores.resize(num_iou_thresholds * num_detections, false);
|
| ground_truth_ignores.resize(num_ground_truth);
|
| for (auto g = 0; g < num_ground_truth; ++g) {
|
| ground_truth_ignores[g] = ignores[ground_truth_sorted_indices[g]];
|
| }
|
|
|
| for (auto t = 0; t < num_iou_thresholds; ++t) {
|
| for (auto d = 0; d < num_detections; ++d) {
|
|
|
| double best_iou = std::min(iou_thresholds[t], 1 - 1e-10);
|
| int match = -1;
|
| for (auto g = 0; g < num_ground_truth; ++g) {
|
|
|
|
|
| if (ground_truth_matches[t * num_ground_truth + g] > 0 &&
|
| !ground_truth_instances[ground_truth_sorted_indices[g]].is_crowd) {
|
| continue;
|
| }
|
|
|
|
|
|
|
|
|
| if (match >= 0 && !ground_truth_ignores[match] &&
|
| ground_truth_ignores[g]) {
|
| break;
|
| }
|
|
|
|
|
| if (ious[d][ground_truth_sorted_indices[g]] >= best_iou) {
|
| best_iou = ious[d][ground_truth_sorted_indices[g]];
|
| match = g;
|
| }
|
| }
|
|
|
|
|
| if (match >= 0) {
|
| detection_ignores[t * num_detections + d] = ground_truth_ignores[match];
|
| detection_matches[t * num_detections + d] =
|
| ground_truth_instances[ground_truth_sorted_indices[match]].id;
|
| ground_truth_matches[t * num_ground_truth + match] =
|
| detection_instances[detection_sorted_indices[d]].id;
|
| }
|
|
|
|
|
| const InstanceAnnotation& detection =
|
| detection_instances[detection_sorted_indices[d]];
|
| detection_ignores[t * num_detections + d] =
|
| detection_ignores[t * num_detections + d] ||
|
| (detection_matches[t * num_detections + d] == 0 &&
|
| (detection.area < area_range[0] || detection.area > area_range[1]));
|
| }
|
| }
|
|
|
|
|
| results->detection_scores.resize(detection_sorted_indices.size());
|
| for (size_t d = 0; d < detection_sorted_indices.size(); ++d) {
|
| results->detection_scores[d] =
|
| detection_instances[detection_sorted_indices[d]].score;
|
| }
|
| }
|
|
|
| std::vector<ImageEvaluation> EvaluateImages(
|
| const std::vector<std::array<double, 2>>& area_ranges,
|
| int max_detections,
|
| const std::vector<double>& iou_thresholds,
|
| const ImageCategoryInstances<std::vector<double>>& image_category_ious,
|
| const ImageCategoryInstances<InstanceAnnotation>&
|
| image_category_ground_truth_instances,
|
| const ImageCategoryInstances<InstanceAnnotation>&
|
| image_category_detection_instances) {
|
| const int num_area_ranges = area_ranges.size();
|
| const int num_images = image_category_ground_truth_instances.size();
|
| const int num_categories =
|
| image_category_ious.size() > 0 ? image_category_ious[0].size() : 0;
|
| std::vector<uint64_t> detection_sorted_indices;
|
| std::vector<uint64_t> ground_truth_sorted_indices;
|
| std::vector<bool> ignores;
|
| std::vector<ImageEvaluation> results_all(
|
| num_images * num_area_ranges * num_categories);
|
|
|
|
|
|
|
| for (auto i = 0; i < num_images; ++i) {
|
| for (auto c = 0; c < num_categories; ++c) {
|
| const std::vector<InstanceAnnotation>& ground_truth_instances =
|
| image_category_ground_truth_instances[i][c];
|
| const std::vector<InstanceAnnotation>& detection_instances =
|
| image_category_detection_instances[i][c];
|
|
|
| SortInstancesByDetectionScore(
|
| detection_instances, &detection_sorted_indices);
|
| if ((int)detection_sorted_indices.size() > max_detections) {
|
| detection_sorted_indices.resize(max_detections);
|
| }
|
|
|
| for (size_t a = 0; a < area_ranges.size(); ++a) {
|
| SortInstancesByIgnore(
|
| area_ranges[a],
|
| ground_truth_instances,
|
| &ground_truth_sorted_indices,
|
| &ignores);
|
|
|
| MatchDetectionsToGroundTruth(
|
| detection_instances,
|
| detection_sorted_indices,
|
| ground_truth_instances,
|
| ground_truth_sorted_indices,
|
| ignores,
|
| image_category_ious[i][c],
|
| iou_thresholds,
|
| area_ranges[a],
|
| &results_all
|
| [c * num_area_ranges * num_images + a * num_images + i]);
|
| }
|
| }
|
| }
|
|
|
| return results_all;
|
| }
|
|
|
|
|
| template <typename T>
|
| std::vector<T> list_to_vec(const py::list& l) {
|
| std::vector<T> v(py::len(l));
|
| for (int i = 0; i < (int)py::len(l); ++i) {
|
| v[i] = l[i].cast<T>();
|
| }
|
| return v;
|
| }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| int BuildSortedDetectionList(
|
| const std::vector<ImageEvaluation>& evaluations,
|
| const int64_t evaluation_index,
|
| const int64_t num_images,
|
| const int max_detections,
|
| std::vector<uint64_t>* evaluation_indices,
|
| std::vector<double>* detection_scores,
|
| std::vector<uint64_t>* detection_sorted_indices,
|
| std::vector<uint64_t>* image_detection_indices) {
|
| assert(evaluations.size() >= evaluation_index + num_images);
|
|
|
|
|
|
|
| image_detection_indices->clear();
|
| evaluation_indices->clear();
|
| detection_scores->clear();
|
| image_detection_indices->reserve(num_images * max_detections);
|
| evaluation_indices->reserve(num_images * max_detections);
|
| detection_scores->reserve(num_images * max_detections);
|
| int num_valid_ground_truth = 0;
|
| for (auto i = 0; i < num_images; ++i) {
|
| const ImageEvaluation& evaluation = evaluations[evaluation_index + i];
|
|
|
| for (int d = 0;
|
| d < (int)evaluation.detection_scores.size() && d < max_detections;
|
| ++d) {
|
| evaluation_indices->push_back(evaluation_index + i);
|
| image_detection_indices->push_back(d);
|
| detection_scores->push_back(evaluation.detection_scores[d]);
|
| }
|
| for (auto ground_truth_ignore : evaluation.ground_truth_ignores) {
|
| if (!ground_truth_ignore) {
|
| ++num_valid_ground_truth;
|
| }
|
| }
|
| }
|
|
|
|
|
|
|
| detection_sorted_indices->resize(detection_scores->size());
|
| std::iota(
|
| detection_sorted_indices->begin(), detection_sorted_indices->end(), 0);
|
| std::stable_sort(
|
| detection_sorted_indices->begin(),
|
| detection_sorted_indices->end(),
|
| [&detection_scores](size_t j1, size_t j2) {
|
| return (*detection_scores)[j1] > (*detection_scores)[j2];
|
| });
|
|
|
| return num_valid_ground_truth;
|
| }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| void ComputePrecisionRecallCurve(
|
| const int64_t precisions_out_index,
|
| const int64_t precisions_out_stride,
|
| const int64_t recalls_out_index,
|
| const std::vector<double>& recall_thresholds,
|
| const int iou_threshold_index,
|
| const int num_iou_thresholds,
|
| const int num_valid_ground_truth,
|
| const std::vector<ImageEvaluation>& evaluations,
|
| const std::vector<uint64_t>& evaluation_indices,
|
| const std::vector<double>& detection_scores,
|
| const std::vector<uint64_t>& detection_sorted_indices,
|
| const std::vector<uint64_t>& image_detection_indices,
|
| std::vector<double>* precisions,
|
| std::vector<double>* recalls,
|
| std::vector<double>* precisions_out,
|
| std::vector<double>* scores_out,
|
| std::vector<double>* recalls_out) {
|
| assert(recalls_out->size() > recalls_out_index);
|
|
|
|
|
| int64_t true_positives_sum = 0, false_positives_sum = 0;
|
| precisions->clear();
|
| recalls->clear();
|
| precisions->reserve(detection_sorted_indices.size());
|
| recalls->reserve(detection_sorted_indices.size());
|
| assert(!evaluations.empty() || detection_sorted_indices.empty());
|
| for (auto detection_sorted_index : detection_sorted_indices) {
|
| const ImageEvaluation& evaluation =
|
| evaluations[evaluation_indices[detection_sorted_index]];
|
| const auto num_detections =
|
| evaluation.detection_matches.size() / num_iou_thresholds;
|
| const auto detection_index = iou_threshold_index * num_detections +
|
| image_detection_indices[detection_sorted_index];
|
| assert(evaluation.detection_matches.size() > detection_index);
|
| assert(evaluation.detection_ignores.size() > detection_index);
|
| const int64_t detection_match =
|
| evaluation.detection_matches[detection_index];
|
| const bool detection_ignores =
|
| evaluation.detection_ignores[detection_index];
|
| const auto true_positive = detection_match > 0 && !detection_ignores;
|
| const auto false_positive = detection_match == 0 && !detection_ignores;
|
| if (true_positive) {
|
| ++true_positives_sum;
|
| }
|
| if (false_positive) {
|
| ++false_positives_sum;
|
| }
|
|
|
| const double recall =
|
| static_cast<double>(true_positives_sum) / num_valid_ground_truth;
|
| recalls->push_back(recall);
|
| const int64_t num_valid_detections =
|
| true_positives_sum + false_positives_sum;
|
| const double precision = num_valid_detections > 0
|
| ? static_cast<double>(true_positives_sum) / num_valid_detections
|
| : 0.0;
|
| precisions->push_back(precision);
|
| }
|
|
|
| (*recalls_out)[recalls_out_index] = !recalls->empty() ? recalls->back() : 0;
|
|
|
| for (int64_t i = static_cast<int64_t>(precisions->size()) - 1; i > 0; --i) {
|
| if ((*precisions)[i] > (*precisions)[i - 1]) {
|
| (*precisions)[i - 1] = (*precisions)[i];
|
| }
|
| }
|
|
|
|
|
| for (size_t r = 0; r < recall_thresholds.size(); ++r) {
|
|
|
| std::vector<double>::iterator low = std::lower_bound(
|
| recalls->begin(), recalls->end(), recall_thresholds[r]);
|
| size_t precisions_index = low - recalls->begin();
|
|
|
| const auto results_ind = precisions_out_index + r * precisions_out_stride;
|
| assert(results_ind < precisions_out->size());
|
| assert(results_ind < scores_out->size());
|
| if (precisions_index < precisions->size()) {
|
| (*precisions_out)[results_ind] = (*precisions)[precisions_index];
|
| (*scores_out)[results_ind] =
|
| detection_scores[detection_sorted_indices[precisions_index]];
|
| } else {
|
| (*precisions_out)[results_ind] = 0;
|
| (*scores_out)[results_ind] = 0;
|
| }
|
| }
|
| }
|
| py::dict Accumulate(
|
| const py::object& params,
|
| const std::vector<ImageEvaluation>& evaluations) {
|
| const std::vector<double> recall_thresholds =
|
| list_to_vec<double>(params.attr("recThrs"));
|
| const std::vector<int> max_detections =
|
| list_to_vec<int>(params.attr("maxDets"));
|
| const int num_iou_thresholds = py::len(params.attr("iouThrs"));
|
| const int num_recall_thresholds = py::len(params.attr("recThrs"));
|
| const int num_categories = params.attr("useCats").cast<int>() == 1
|
| ? py::len(params.attr("catIds"))
|
| : 1;
|
| const int num_area_ranges = py::len(params.attr("areaRng"));
|
| const int num_max_detections = py::len(params.attr("maxDets"));
|
| const int num_images = py::len(params.attr("imgIds"));
|
|
|
| std::vector<double> precisions_out(
|
| num_iou_thresholds * num_recall_thresholds * num_categories *
|
| num_area_ranges * num_max_detections,
|
| -1);
|
| std::vector<double> recalls_out(
|
| num_iou_thresholds * num_categories * num_area_ranges *
|
| num_max_detections,
|
| -1);
|
| std::vector<double> scores_out(
|
| num_iou_thresholds * num_recall_thresholds * num_categories *
|
| num_area_ranges * num_max_detections,
|
| -1);
|
|
|
|
|
|
|
|
|
|
|
|
|
| std::vector<uint64_t> evaluation_indices;
|
| std::vector<double> detection_scores;
|
| std::vector<uint64_t> detection_sorted_indices;
|
|
|
| std::vector<uint64_t>
|
| image_detection_indices;
|
|
|
| std::vector<double> precisions, recalls;
|
|
|
| for (auto c = 0; c < num_categories; ++c) {
|
| for (auto a = 0; a < num_area_ranges; ++a) {
|
| for (auto m = 0; m < num_max_detections; ++m) {
|
|
|
|
|
|
|
|
|
| const int64_t evaluations_index =
|
| c * num_area_ranges * num_images + a * num_images;
|
| int num_valid_ground_truth = BuildSortedDetectionList(
|
| evaluations,
|
| evaluations_index,
|
| num_images,
|
| max_detections[m],
|
| &evaluation_indices,
|
| &detection_scores,
|
| &detection_sorted_indices,
|
| &image_detection_indices);
|
|
|
| if (num_valid_ground_truth == 0) {
|
| continue;
|
| }
|
|
|
| for (auto t = 0; t < num_iou_thresholds; ++t) {
|
|
|
|
|
|
|
| const int64_t recalls_out_index =
|
| t * num_categories * num_area_ranges * num_max_detections +
|
| c * num_area_ranges * num_max_detections +
|
| a * num_max_detections + m;
|
|
|
|
|
|
|
|
|
| const int64_t precisions_out_stride =
|
| num_categories * num_area_ranges * num_max_detections;
|
| const int64_t precisions_out_index = t * num_recall_thresholds *
|
| num_categories * num_area_ranges * num_max_detections +
|
| c * num_area_ranges * num_max_detections +
|
| a * num_max_detections + m;
|
|
|
| ComputePrecisionRecallCurve(
|
| precisions_out_index,
|
| precisions_out_stride,
|
| recalls_out_index,
|
| recall_thresholds,
|
| t,
|
| num_iou_thresholds,
|
| num_valid_ground_truth,
|
| evaluations,
|
| evaluation_indices,
|
| detection_scores,
|
| detection_sorted_indices,
|
| image_detection_indices,
|
| &precisions,
|
| &recalls,
|
| &precisions_out,
|
| &scores_out,
|
| &recalls_out);
|
| }
|
| }
|
| }
|
| }
|
|
|
| time_t rawtime;
|
| struct tm local_time;
|
| std::array<char, 200> buffer;
|
| time(&rawtime);
|
| #ifdef _WIN32
|
| localtime_s(&local_time, &rawtime);
|
| #else
|
| localtime_r(&rawtime, &local_time);
|
| #endif
|
| strftime(
|
| buffer.data(), 200, "%Y-%m-%d %H:%num_max_detections:%S", &local_time);
|
| return py::dict(
|
| "params"_a = params,
|
| "counts"_a = std::vector<int64_t>(
|
| {num_iou_thresholds,
|
| num_recall_thresholds,
|
| num_categories,
|
| num_area_ranges,
|
| num_max_detections}),
|
| "date"_a = buffer,
|
| "precision"_a = precisions_out,
|
| "recall"_a = recalls_out,
|
| "scores"_a = scores_out);
|
| }
|
|
|
| }
|
|
|
| }
|
|
|