aaljabari commited on
Commit
3407a7c
·
verified ·
1 Parent(s): f26a4b0

Create metrics.py

Browse files
Files changed (1) hide show
  1. Nested/utils/metrics.py +69 -0
Nested/utils/metrics.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from seqeval.metrics import (
2
+ classification_report,
3
+ precision_score,
4
+ recall_score,
5
+ f1_score,
6
+ accuracy_score,
7
+ )
8
+ from seqeval.scheme import IOB2
9
+ from types import SimpleNamespace
10
+ import logging
11
+ import re
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ def compute_nested_metrics(segments, vocabs):
17
+ """
18
+ Compute metrics for nested NER
19
+ :param segments: List[List[Nested.data.dataset.Token]] - list of segments
20
+ :return: metrics - SimpleNamespace - F1/micro/macro/weights, recall, precision, accuracy
21
+ """
22
+ y, y_hat = list(), list()
23
+
24
+ # We duplicate the dataset N times, where N is the number of entity types
25
+ # For each copy, we create y and y_hat
26
+ # Example: first copy, will create pairs of ground truth and predicted labels for entity type GPE
27
+ # another copy will create pairs for LOC, etc.
28
+ for i, vocab in enumerate(vocabs):
29
+ vocab_tags = [tag for tag in vocab.get_itos() if "-" in tag]
30
+ r = re.compile("|".join(vocab_tags))
31
+
32
+ y += [[(list(filter(r.match, token.gold_tag)) or ["O"])[0] for token in segment] for segment in segments]
33
+ y_hat += [[token.pred_tag[i]["tag"] for token in segment] for segment in segments]
34
+
35
+ logging.info("\n" + classification_report(y, y_hat, scheme=IOB2, digits=4))
36
+
37
+ metrics = {
38
+ "micro_f1": f1_score(y, y_hat, average="micro", scheme=IOB2),
39
+ "macro_f1": f1_score(y, y_hat, average="macro", scheme=IOB2),
40
+ "weights_f1": f1_score(y, y_hat, average="weighted", scheme=IOB2),
41
+ "precision": precision_score(y, y_hat, scheme=IOB2),
42
+ "recall": recall_score(y, y_hat, scheme=IOB2),
43
+ "accuracy": accuracy_score(y, y_hat),
44
+ }
45
+
46
+ return SimpleNamespace(**metrics)
47
+
48
+
49
+ def compute_single_label_metrics(segments):
50
+ """
51
+ Compute metrics for flat NER
52
+ :param segments: List[List[Nested.data.dataset.Token]] - list of segments
53
+ :return: metrics - SimpleNamespace - F1/micro/macro/weights, recall, precision, accuracy
54
+ """
55
+ y = [[token.gold_tag[0] for token in segment] for segment in segments]
56
+ y_hat = [[token.pred_tag[0]["tag"] for token in segment] for segment in segments]
57
+
58
+ logging.info("\n" + classification_report(y, y_hat, scheme=IOB2, digits=4))
59
+
60
+ metrics = {
61
+ "micro_f1": f1_score(y, y_hat, average="micro", scheme=IOB2),
62
+ "macro_f1": f1_score(y, y_hat, average="macro", scheme=IOB2),
63
+ "weights_f1": f1_score(y, y_hat, average="weighted", scheme=IOB2),
64
+ "precision": precision_score(y, y_hat, scheme=IOB2),
65
+ "recall": recall_score(y, y_hat, scheme=IOB2),
66
+ "accuracy": accuracy_score(y, y_hat),
67
+ }
68
+
69
+ return SimpleNamespace(**metrics)