Spaces:
Running
Running
Create metrics.py
Browse files- Nested/utils/metrics.py +69 -0
Nested/utils/metrics.py
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from seqeval.metrics import (
|
| 2 |
+
classification_report,
|
| 3 |
+
precision_score,
|
| 4 |
+
recall_score,
|
| 5 |
+
f1_score,
|
| 6 |
+
accuracy_score,
|
| 7 |
+
)
|
| 8 |
+
from seqeval.scheme import IOB2
|
| 9 |
+
from types import SimpleNamespace
|
| 10 |
+
import logging
|
| 11 |
+
import re
|
| 12 |
+
|
| 13 |
+
logger = logging.getLogger(__name__)
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
def compute_nested_metrics(segments, vocabs):
|
| 17 |
+
"""
|
| 18 |
+
Compute metrics for nested NER
|
| 19 |
+
:param segments: List[List[Nested.data.dataset.Token]] - list of segments
|
| 20 |
+
:return: metrics - SimpleNamespace - F1/micro/macro/weights, recall, precision, accuracy
|
| 21 |
+
"""
|
| 22 |
+
y, y_hat = list(), list()
|
| 23 |
+
|
| 24 |
+
# We duplicate the dataset N times, where N is the number of entity types
|
| 25 |
+
# For each copy, we create y and y_hat
|
| 26 |
+
# Example: first copy, will create pairs of ground truth and predicted labels for entity type GPE
|
| 27 |
+
# another copy will create pairs for LOC, etc.
|
| 28 |
+
for i, vocab in enumerate(vocabs):
|
| 29 |
+
vocab_tags = [tag for tag in vocab.get_itos() if "-" in tag]
|
| 30 |
+
r = re.compile("|".join(vocab_tags))
|
| 31 |
+
|
| 32 |
+
y += [[(list(filter(r.match, token.gold_tag)) or ["O"])[0] for token in segment] for segment in segments]
|
| 33 |
+
y_hat += [[token.pred_tag[i]["tag"] for token in segment] for segment in segments]
|
| 34 |
+
|
| 35 |
+
logging.info("\n" + classification_report(y, y_hat, scheme=IOB2, digits=4))
|
| 36 |
+
|
| 37 |
+
metrics = {
|
| 38 |
+
"micro_f1": f1_score(y, y_hat, average="micro", scheme=IOB2),
|
| 39 |
+
"macro_f1": f1_score(y, y_hat, average="macro", scheme=IOB2),
|
| 40 |
+
"weights_f1": f1_score(y, y_hat, average="weighted", scheme=IOB2),
|
| 41 |
+
"precision": precision_score(y, y_hat, scheme=IOB2),
|
| 42 |
+
"recall": recall_score(y, y_hat, scheme=IOB2),
|
| 43 |
+
"accuracy": accuracy_score(y, y_hat),
|
| 44 |
+
}
|
| 45 |
+
|
| 46 |
+
return SimpleNamespace(**metrics)
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def compute_single_label_metrics(segments):
|
| 50 |
+
"""
|
| 51 |
+
Compute metrics for flat NER
|
| 52 |
+
:param segments: List[List[Nested.data.dataset.Token]] - list of segments
|
| 53 |
+
:return: metrics - SimpleNamespace - F1/micro/macro/weights, recall, precision, accuracy
|
| 54 |
+
"""
|
| 55 |
+
y = [[token.gold_tag[0] for token in segment] for segment in segments]
|
| 56 |
+
y_hat = [[token.pred_tag[0]["tag"] for token in segment] for segment in segments]
|
| 57 |
+
|
| 58 |
+
logging.info("\n" + classification_report(y, y_hat, scheme=IOB2, digits=4))
|
| 59 |
+
|
| 60 |
+
metrics = {
|
| 61 |
+
"micro_f1": f1_score(y, y_hat, average="micro", scheme=IOB2),
|
| 62 |
+
"macro_f1": f1_score(y, y_hat, average="macro", scheme=IOB2),
|
| 63 |
+
"weights_f1": f1_score(y, y_hat, average="weighted", scheme=IOB2),
|
| 64 |
+
"precision": precision_score(y, y_hat, scheme=IOB2),
|
| 65 |
+
"recall": recall_score(y, y_hat, scheme=IOB2),
|
| 66 |
+
"accuracy": accuracy_score(y, y_hat),
|
| 67 |
+
}
|
| 68 |
+
|
| 69 |
+
return SimpleNamespace(**metrics)
|