File size: 6,354 Bytes
3f42a6f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
# pylint: disable=invalid-name,too-many-locals
import copy
import typing
import warnings

import editdistance
import numpy as np
import pyclipper
import cv2


# Adapted from https://github.com/andreasveit/coco-text/blob/master/coco_evaluation.py
def iou_score(box1, box2):
    """Returns the Intersection-over-Union score, defined as the area of
    the intersection divided by the intersection over the union of
    the two bounding boxes. This measure is symmetric.

    Args:
        box1: The coordinates for box 1 as a list of (x, y) coordinates
        box2: The coordinates for box 2 in same format as box1.
    """
    if len(box1) == 2:
        x1, y1 = box1[0]
        x2, y2 = box1[1]
        box1 = np.array([[x1, y1], [x2, y1], [x2, y2], [x1, y2]])
    if len(box2) == 2:
        x1, y1 = box2[0]
        x2, y2 = box2[1]
        box2 = np.array([[x1, y1], [x2, y1], [x2, y2], [x1, y2]])
    if any(
        cv2.contourArea(np.array(box, dtype="int32")[:, np.newaxis, :]) == 0
        for box in [box1, box2]
    ):
        warnings.warn("A box with zero area was detected.")
        return 0
    pc = pyclipper.Pyclipper()
    pc.AddPath(np.array(box1, dtype="int32"), pyclipper.PT_SUBJECT, closed=True)
    pc.AddPath(np.array(box2, dtype="int32"), pyclipper.PT_CLIP, closed=True)
    intersection_solutions = pc.Execute(
        pyclipper.CT_INTERSECTION, pyclipper.PFT_EVENODD, pyclipper.PFT_EVENODD
    )
    union_solutions = pc.Execute(
        pyclipper.CT_UNION, pyclipper.PFT_EVENODD, pyclipper.PFT_EVENODD
    )
    union = sum(
        cv2.contourArea(np.array(points, dtype="int32")[:, np.newaxis, :])
        for points in union_solutions
    )
    intersection = sum(
        cv2.contourArea(np.array(points, dtype="int32")[:, np.newaxis, :])
        for points in intersection_solutions
    )
    return intersection / union


def score(true, pred, iou_threshold=0.5, similarity_threshold=0.5, translator=None):
    """
    Args:
        true: The ground truth boxes provided as a dictionary of {image_id: annotations}
            mappings. `annotations` should be lists of dicts with a `text` and `vertices` key.
            `vertices` should be a list of (x, y) coordinates. Optionally, an "ignore" key can be
            added to indicate that detecting an annotation should neither count as a false positive
            nor should failure to detect it count as a false negative.
        pred: The predicted boxes in the same format as `true`.
        iou_threshold: The minimum IoU to qualify a box as a match.
        similarity_threshold: The minimum texg similarity required to qualify
            a text string as a match.
        translator: A translator acceptable by `str.translate`. Used to
            modify ground truth / predicted strings. For example,
            `str.maketrans(string.ascii_uppercase, string.ascii_lowercase,
            string.punctuation)` would yield a translator that changes all
            strings to lowercase and removes punctuation.

    Returns:
        A results dictionary reporting false positives, false negatives, true positives
        and near matches (IoU > iou_threshold but similarity < similarity_threshold) along
        with the compute precision and recall.
    """
    true_ids = sorted(true)
    pred_ids = sorted(pred)
    assert all(
        true_id == pred_id for true_id, pred_id in zip(true_ids, pred_ids)
    ), "true and pred dictionaries must have the same keys"
    results: typing.Dict[str, typing.List[dict]] = {
        "true_positives": [],
        "false_positives": [],
        "near_true_positives": [],
        "false_negatives": [],
    }
    for image_id in true_ids:
        true_anns = true[image_id]
        pred_anns = copy.deepcopy(pred[image_id])
        pred_matched = set()
        for true_index, true_ann in enumerate(true_anns):
            match = None
            for pred_index, pred_ann in enumerate(pred_anns):
                iou = iou_score(true_ann["vertices"], pred_ann["vertices"])
                if iou >= iou_threshold:
                    match = {
                        "true_idx": true_index,
                        "pred_idx": pred_index,
                        "image_id": image_id,
                    }
                    pred_matched.add(pred_index)
                    true_text = true_ann["text"]
                    pred_text = pred_ann["text"]
                    if true_ann.get("ignore", False):
                        # We recorded that this prediction matched something,
                        # so it won't be a false positive. But we're also ignoring
                        # this ground truth label so we won't count it as a true
                        # positive or a near true positive.
                        continue
                    if translator is not None:
                        true_text = true_text.translate(translator)
                        pred_text = pred_text.translate(translator)
                    edit_distance_norm = max(len(true_text), len(pred_text))
                    if edit_distance_norm == 0:
                        similarity = 1
                    else:
                        similarity = 1 - (
                            editdistance.eval(true_text, pred_text)
                            / max(len(true_text), len(pred_text))
                        )
                    if similarity >= similarity_threshold:
                        results["true_positives"].append(match)
                    else:
                        results["near_true_positives"].append(match)
            if match is None and not true_ann.get("ignore", False):
                results["false_negatives"].append(
                    {"image_id": image_id, "true_idx": true_index}
                )
        results["false_positives"].extend(
            {"pred_index": pred_index, "image_id": image_id}
            for pred_index, _ in enumerate(pred_anns)
            if pred_index not in pred_matched
        )
    fns = len(results["false_negatives"])
    fps = len(results["false_positives"])
    tps = len(
        set(
            (true_positive["image_id"], true_positive["true_idx"])
            for true_positive in results["true_positives"]
        )
    )
    precision = tps / (tps + fps)
    recall = tps / (tps + fns)
    return results, (precision, recall)