Commit
·
7966679
1
Parent(s):
fdcd171
lets try to repair inconsistenciesse
Browse files- generic_ner.py +5 -0
generic_ner.py
CHANGED
|
@@ -73,6 +73,11 @@ def get_entities(tokens, tags, confidences, text):
|
|
| 73 |
tags = [tag.replace("S-", "B-").replace("E-", "I-") for tag in tags]
|
| 74 |
pos_tags = [pos for token, pos in pos_tag(tokens)]
|
| 75 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
conlltags = [(token, pos, tg) for token, pos, tg in zip(tokens, pos_tags, tags)]
|
| 77 |
ne_tree = conlltags2tree(conlltags)
|
| 78 |
|
|
|
|
| 73 |
tags = [tag.replace("S-", "B-").replace("E-", "I-") for tag in tags]
|
| 74 |
pos_tags = [pos for token, pos in pos_tag(tokens)]
|
| 75 |
|
| 76 |
+
for i in range(1, len(tags)):
|
| 77 |
+
# If a 'B-' tag is followed by another 'B-' without an 'O' in between, change the second to 'I-'
|
| 78 |
+
if tags[i].startswith("B-") and tags[i - 1].startswith("I-"):
|
| 79 |
+
tags[i] = "I-" + tags[i][2:] # Change 'B-' to 'I-' for the same entity type
|
| 80 |
+
|
| 81 |
conlltags = [(token, pos, tg) for token, pos, tg in zip(tokens, pos_tags, tags)]
|
| 82 |
ne_tree = conlltags2tree(conlltags)
|
| 83 |
|