| | |
| | #include "cocoeval.h" |
| | #include <time.h> |
| | #include <algorithm> |
| | #include <cstdint> |
| | #include <numeric> |
| |
|
| | using namespace pybind11::literals; |
| |
|
| | namespace detectron2 { |
| |
|
| | namespace COCOeval { |
| |
|
| | |
| | |
| | |
| | |
| | void SortInstancesByDetectionScore( |
| | const std::vector<InstanceAnnotation>& detection_instances, |
| | std::vector<uint64_t>* detection_sorted_indices) { |
| | detection_sorted_indices->resize(detection_instances.size()); |
| | std::iota( |
| | detection_sorted_indices->begin(), detection_sorted_indices->end(), 0); |
| | std::stable_sort( |
| | detection_sorted_indices->begin(), |
| | detection_sorted_indices->end(), |
| | [&detection_instances](size_t j1, size_t j2) { |
| | return detection_instances[j1].score > detection_instances[j2].score; |
| | }); |
| | } |
| |
|
| | |
| | |
| | void SortInstancesByIgnore( |
| | const std::array<double, 2>& area_range, |
| | const std::vector<InstanceAnnotation>& ground_truth_instances, |
| | std::vector<uint64_t>* ground_truth_sorted_indices, |
| | std::vector<bool>* ignores) { |
| | ignores->clear(); |
| | ignores->reserve(ground_truth_instances.size()); |
| | for (auto o : ground_truth_instances) { |
| | ignores->push_back( |
| | o.ignore || o.area < area_range[0] || o.area > area_range[1]); |
| | } |
| |
|
| | ground_truth_sorted_indices->resize(ground_truth_instances.size()); |
| | std::iota( |
| | ground_truth_sorted_indices->begin(), |
| | ground_truth_sorted_indices->end(), |
| | 0); |
| | std::stable_sort( |
| | ground_truth_sorted_indices->begin(), |
| | ground_truth_sorted_indices->end(), |
| | [&ignores](size_t j1, size_t j2) { |
| | return (int)(*ignores)[j1] < (int)(*ignores)[j2]; |
| | }); |
| | } |
| |
|
| | |
| | |
| | void MatchDetectionsToGroundTruth( |
| | const std::vector<InstanceAnnotation>& detection_instances, |
| | const std::vector<uint64_t>& detection_sorted_indices, |
| | const std::vector<InstanceAnnotation>& ground_truth_instances, |
| | const std::vector<uint64_t>& ground_truth_sorted_indices, |
| | const std::vector<bool>& ignores, |
| | const std::vector<std::vector<double>>& ious, |
| | const std::vector<double>& iou_thresholds, |
| | const std::array<double, 2>& area_range, |
| | ImageEvaluation* results) { |
| | |
| | const int num_iou_thresholds = iou_thresholds.size(); |
| | const int num_ground_truth = ground_truth_sorted_indices.size(); |
| | const int num_detections = detection_sorted_indices.size(); |
| | std::vector<uint64_t> ground_truth_matches( |
| | num_iou_thresholds * num_ground_truth, 0); |
| | std::vector<uint64_t>& detection_matches = results->detection_matches; |
| | std::vector<bool>& detection_ignores = results->detection_ignores; |
| | std::vector<bool>& ground_truth_ignores = results->ground_truth_ignores; |
| | detection_matches.resize(num_iou_thresholds * num_detections, 0); |
| | detection_ignores.resize(num_iou_thresholds * num_detections, false); |
| | ground_truth_ignores.resize(num_ground_truth); |
| | for (auto g = 0; g < num_ground_truth; ++g) { |
| | ground_truth_ignores[g] = ignores[ground_truth_sorted_indices[g]]; |
| | } |
| |
|
| | for (auto t = 0; t < num_iou_thresholds; ++t) { |
| | for (auto d = 0; d < num_detections; ++d) { |
| | |
| | double best_iou = std::min(iou_thresholds[t], 1 - 1e-10); |
| | int match = -1; |
| | for (auto g = 0; g < num_ground_truth; ++g) { |
| | |
| | |
| | if (ground_truth_matches[t * num_ground_truth + g] > 0 && |
| | !ground_truth_instances[ground_truth_sorted_indices[g]].is_crowd) { |
| | continue; |
| | } |
| |
|
| | |
| | |
| | |
| | if (match >= 0 && !ground_truth_ignores[match] && |
| | ground_truth_ignores[g]) { |
| | break; |
| | } |
| |
|
| | |
| | if (ious[d][ground_truth_sorted_indices[g]] >= best_iou) { |
| | best_iou = ious[d][ground_truth_sorted_indices[g]]; |
| | match = g; |
| | } |
| | } |
| | |
| | |
| | if (match >= 0) { |
| | detection_ignores[t * num_detections + d] = ground_truth_ignores[match]; |
| | detection_matches[t * num_detections + d] = |
| | ground_truth_instances[ground_truth_sorted_indices[match]].id; |
| | ground_truth_matches[t * num_ground_truth + match] = |
| | detection_instances[detection_sorted_indices[d]].id; |
| | } |
| |
|
| | |
| | const InstanceAnnotation& detection = |
| | detection_instances[detection_sorted_indices[d]]; |
| | detection_ignores[t * num_detections + d] = |
| | detection_ignores[t * num_detections + d] || |
| | (detection_matches[t * num_detections + d] == 0 && |
| | (detection.area < area_range[0] || detection.area > area_range[1])); |
| | } |
| | } |
| |
|
| | |
| | results->detection_scores.resize(detection_sorted_indices.size()); |
| | for (size_t d = 0; d < detection_sorted_indices.size(); ++d) { |
| | results->detection_scores[d] = |
| | detection_instances[detection_sorted_indices[d]].score; |
| | } |
| | } |
| |
|
| | std::vector<ImageEvaluation> EvaluateImages( |
| | const std::vector<std::array<double, 2>>& area_ranges, |
| | int max_detections, |
| | const std::vector<double>& iou_thresholds, |
| | const ImageCategoryInstances<std::vector<double>>& image_category_ious, |
| | const ImageCategoryInstances<InstanceAnnotation>& |
| | image_category_ground_truth_instances, |
| | const ImageCategoryInstances<InstanceAnnotation>& |
| | image_category_detection_instances) { |
| | const int num_area_ranges = area_ranges.size(); |
| | const int num_images = image_category_ground_truth_instances.size(); |
| | const int num_categories = |
| | image_category_ious.size() > 0 ? image_category_ious[0].size() : 0; |
| | std::vector<uint64_t> detection_sorted_indices; |
| | std::vector<uint64_t> ground_truth_sorted_indices; |
| | std::vector<bool> ignores; |
| | std::vector<ImageEvaluation> results_all( |
| | num_images * num_area_ranges * num_categories); |
| |
|
| | |
| | |
| | for (auto i = 0; i < num_images; ++i) { |
| | for (auto c = 0; c < num_categories; ++c) { |
| | const std::vector<InstanceAnnotation>& ground_truth_instances = |
| | image_category_ground_truth_instances[i][c]; |
| | const std::vector<InstanceAnnotation>& detection_instances = |
| | image_category_detection_instances[i][c]; |
| |
|
| | SortInstancesByDetectionScore( |
| | detection_instances, &detection_sorted_indices); |
| | if ((int)detection_sorted_indices.size() > max_detections) { |
| | detection_sorted_indices.resize(max_detections); |
| | } |
| |
|
| | for (size_t a = 0; a < area_ranges.size(); ++a) { |
| | SortInstancesByIgnore( |
| | area_ranges[a], |
| | ground_truth_instances, |
| | &ground_truth_sorted_indices, |
| | &ignores); |
| |
|
| | MatchDetectionsToGroundTruth( |
| | detection_instances, |
| | detection_sorted_indices, |
| | ground_truth_instances, |
| | ground_truth_sorted_indices, |
| | ignores, |
| | image_category_ious[i][c], |
| | iou_thresholds, |
| | area_ranges[a], |
| | &results_all |
| | [c * num_area_ranges * num_images + a * num_images + i]); |
| | } |
| | } |
| | } |
| |
|
| | return results_all; |
| | } |
| |
|
| | |
| | template <typename T> |
| | std::vector<T> list_to_vec(const py::list& l) { |
| | std::vector<T> v(py::len(l)); |
| | for (int i = 0; i < (int)py::len(l); ++i) { |
| | v[i] = l[i].cast<T>(); |
| | } |
| | return v; |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | int BuildSortedDetectionList( |
| | const std::vector<ImageEvaluation>& evaluations, |
| | const int64_t evaluation_index, |
| | const int64_t num_images, |
| | const int max_detections, |
| | std::vector<uint64_t>* evaluation_indices, |
| | std::vector<double>* detection_scores, |
| | std::vector<uint64_t>* detection_sorted_indices, |
| | std::vector<uint64_t>* image_detection_indices) { |
| | assert(evaluations.size() >= evaluation_index + num_images); |
| |
|
| | |
| | |
| | image_detection_indices->clear(); |
| | evaluation_indices->clear(); |
| | detection_scores->clear(); |
| | image_detection_indices->reserve(num_images * max_detections); |
| | evaluation_indices->reserve(num_images * max_detections); |
| | detection_scores->reserve(num_images * max_detections); |
| | int num_valid_ground_truth = 0; |
| | for (auto i = 0; i < num_images; ++i) { |
| | const ImageEvaluation& evaluation = evaluations[evaluation_index + i]; |
| |
|
| | for (int d = 0; |
| | d < (int)evaluation.detection_scores.size() && d < max_detections; |
| | ++d) { |
| | evaluation_indices->push_back(evaluation_index + i); |
| | image_detection_indices->push_back(d); |
| | detection_scores->push_back(evaluation.detection_scores[d]); |
| | } |
| | for (auto ground_truth_ignore : evaluation.ground_truth_ignores) { |
| | if (!ground_truth_ignore) { |
| | ++num_valid_ground_truth; |
| | } |
| | } |
| | } |
| |
|
| | |
| | |
| | detection_sorted_indices->resize(detection_scores->size()); |
| | std::iota( |
| | detection_sorted_indices->begin(), detection_sorted_indices->end(), 0); |
| | std::stable_sort( |
| | detection_sorted_indices->begin(), |
| | detection_sorted_indices->end(), |
| | [&detection_scores](size_t j1, size_t j2) { |
| | return (*detection_scores)[j1] > (*detection_scores)[j2]; |
| | }); |
| |
|
| | return num_valid_ground_truth; |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | void ComputePrecisionRecallCurve( |
| | const int64_t precisions_out_index, |
| | const int64_t precisions_out_stride, |
| | const int64_t recalls_out_index, |
| | const std::vector<double>& recall_thresholds, |
| | const int iou_threshold_index, |
| | const int num_iou_thresholds, |
| | const int num_valid_ground_truth, |
| | const std::vector<ImageEvaluation>& evaluations, |
| | const std::vector<uint64_t>& evaluation_indices, |
| | const std::vector<double>& detection_scores, |
| | const std::vector<uint64_t>& detection_sorted_indices, |
| | const std::vector<uint64_t>& image_detection_indices, |
| | std::vector<double>* precisions, |
| | std::vector<double>* recalls, |
| | std::vector<double>* precisions_out, |
| | std::vector<double>* scores_out, |
| | std::vector<double>* recalls_out) { |
| | assert(recalls_out->size() > recalls_out_index); |
| |
|
| | |
| | int64_t true_positives_sum = 0, false_positives_sum = 0; |
| | precisions->clear(); |
| | recalls->clear(); |
| | precisions->reserve(detection_sorted_indices.size()); |
| | recalls->reserve(detection_sorted_indices.size()); |
| | assert(!evaluations.empty() || detection_sorted_indices.empty()); |
| | for (auto detection_sorted_index : detection_sorted_indices) { |
| | const ImageEvaluation& evaluation = |
| | evaluations[evaluation_indices[detection_sorted_index]]; |
| | const auto num_detections = |
| | evaluation.detection_matches.size() / num_iou_thresholds; |
| | const auto detection_index = iou_threshold_index * num_detections + |
| | image_detection_indices[detection_sorted_index]; |
| | assert(evaluation.detection_matches.size() > detection_index); |
| | assert(evaluation.detection_ignores.size() > detection_index); |
| | const int64_t detection_match = |
| | evaluation.detection_matches[detection_index]; |
| | const bool detection_ignores = |
| | evaluation.detection_ignores[detection_index]; |
| | const auto true_positive = detection_match > 0 && !detection_ignores; |
| | const auto false_positive = detection_match == 0 && !detection_ignores; |
| | if (true_positive) { |
| | ++true_positives_sum; |
| | } |
| | if (false_positive) { |
| | ++false_positives_sum; |
| | } |
| |
|
| | const double recall = |
| | static_cast<double>(true_positives_sum) / num_valid_ground_truth; |
| | recalls->push_back(recall); |
| | const int64_t num_valid_detections = |
| | true_positives_sum + false_positives_sum; |
| | const double precision = num_valid_detections > 0 |
| | ? static_cast<double>(true_positives_sum) / num_valid_detections |
| | : 0.0; |
| | precisions->push_back(precision); |
| | } |
| |
|
| | (*recalls_out)[recalls_out_index] = !recalls->empty() ? recalls->back() : 0; |
| |
|
| | for (int64_t i = static_cast<int64_t>(precisions->size()) - 1; i > 0; --i) { |
| | if ((*precisions)[i] > (*precisions)[i - 1]) { |
| | (*precisions)[i - 1] = (*precisions)[i]; |
| | } |
| | } |
| |
|
| | |
| | for (size_t r = 0; r < recall_thresholds.size(); ++r) { |
| | |
| | std::vector<double>::iterator low = std::lower_bound( |
| | recalls->begin(), recalls->end(), recall_thresholds[r]); |
| | size_t precisions_index = low - recalls->begin(); |
| |
|
| | const auto results_ind = precisions_out_index + r * precisions_out_stride; |
| | assert(results_ind < precisions_out->size()); |
| | assert(results_ind < scores_out->size()); |
| | if (precisions_index < precisions->size()) { |
| | (*precisions_out)[results_ind] = (*precisions)[precisions_index]; |
| | (*scores_out)[results_ind] = |
| | detection_scores[detection_sorted_indices[precisions_index]]; |
| | } else { |
| | (*precisions_out)[results_ind] = 0; |
| | (*scores_out)[results_ind] = 0; |
| | } |
| | } |
| | } |
| | py::dict Accumulate( |
| | const py::object& params, |
| | const std::vector<ImageEvaluation>& evaluations) { |
| | const std::vector<double> recall_thresholds = |
| | list_to_vec<double>(params.attr("recThrs")); |
| | const std::vector<int> max_detections = |
| | list_to_vec<int>(params.attr("maxDets")); |
| | const int num_iou_thresholds = py::len(params.attr("iouThrs")); |
| | const int num_recall_thresholds = py::len(params.attr("recThrs")); |
| | const int num_categories = params.attr("useCats").cast<int>() == 1 |
| | ? py::len(params.attr("catIds")) |
| | : 1; |
| | const int num_area_ranges = py::len(params.attr("areaRng")); |
| | const int num_max_detections = py::len(params.attr("maxDets")); |
| | const int num_images = py::len(params.attr("imgIds")); |
| |
|
| | std::vector<double> precisions_out( |
| | num_iou_thresholds * num_recall_thresholds * num_categories * |
| | num_area_ranges * num_max_detections, |
| | -1); |
| | std::vector<double> recalls_out( |
| | num_iou_thresholds * num_categories * num_area_ranges * |
| | num_max_detections, |
| | -1); |
| | std::vector<double> scores_out( |
| | num_iou_thresholds * num_recall_thresholds * num_categories * |
| | num_area_ranges * num_max_detections, |
| | -1); |
| |
|
| | |
| | |
| | |
| | |
| | |
| | std::vector<uint64_t> evaluation_indices; |
| | std::vector<double> detection_scores; |
| | std::vector<uint64_t> detection_sorted_indices; |
| | |
| | std::vector<uint64_t> |
| | image_detection_indices; |
| | |
| | std::vector<double> precisions, recalls; |
| |
|
| | for (auto c = 0; c < num_categories; ++c) { |
| | for (auto a = 0; a < num_area_ranges; ++a) { |
| | for (auto m = 0; m < num_max_detections; ++m) { |
| | |
| | |
| | |
| | |
| | const int64_t evaluations_index = |
| | c * num_area_ranges * num_images + a * num_images; |
| | int num_valid_ground_truth = BuildSortedDetectionList( |
| | evaluations, |
| | evaluations_index, |
| | num_images, |
| | max_detections[m], |
| | &evaluation_indices, |
| | &detection_scores, |
| | &detection_sorted_indices, |
| | &image_detection_indices); |
| |
|
| | if (num_valid_ground_truth == 0) { |
| | continue; |
| | } |
| |
|
| | for (auto t = 0; t < num_iou_thresholds; ++t) { |
| | |
| | |
| | |
| | const int64_t recalls_out_index = |
| | t * num_categories * num_area_ranges * num_max_detections + |
| | c * num_area_ranges * num_max_detections + |
| | a * num_max_detections + m; |
| |
|
| | |
| | |
| | |
| | const int64_t precisions_out_stride = |
| | num_categories * num_area_ranges * num_max_detections; |
| | const int64_t precisions_out_index = t * num_recall_thresholds * |
| | num_categories * num_area_ranges * num_max_detections + |
| | c * num_area_ranges * num_max_detections + |
| | a * num_max_detections + m; |
| |
|
| | ComputePrecisionRecallCurve( |
| | precisions_out_index, |
| | precisions_out_stride, |
| | recalls_out_index, |
| | recall_thresholds, |
| | t, |
| | num_iou_thresholds, |
| | num_valid_ground_truth, |
| | evaluations, |
| | evaluation_indices, |
| | detection_scores, |
| | detection_sorted_indices, |
| | image_detection_indices, |
| | &precisions, |
| | &recalls, |
| | &precisions_out, |
| | &scores_out, |
| | &recalls_out); |
| | } |
| | } |
| | } |
| | } |
| |
|
| | time_t rawtime; |
| | struct tm local_time; |
| | std::array<char, 200> buffer; |
| | time(&rawtime); |
| | #ifdef _WIN32 |
| | localtime_s(&local_time, &rawtime); |
| | #else |
| | localtime_r(&rawtime, &local_time); |
| | #endif |
| | strftime( |
| | buffer.data(), 200, "%Y-%m-%d %H:%num_max_detections:%S", &local_time); |
| | return py::dict( |
| | "params"_a = params, |
| | "counts"_a = std::vector<int64_t>( |
| | {num_iou_thresholds, |
| | num_recall_thresholds, |
| | num_categories, |
| | num_area_ranges, |
| | num_max_detections}), |
| | "date"_a = buffer, |
| | "precision"_a = precisions_out, |
| | "recall"_a = recalls_out, |
| | "scores"_a = scores_out); |
| | } |
| |
|
| | } |
| |
|
| | } |
| |
|