emanuelaboros commited on
Commit
7966679
·
1 Parent(s): fdcd171

lets try to repair inconsistenciesse

Browse files
Files changed (1) hide show
  1. generic_ner.py +5 -0
generic_ner.py CHANGED
@@ -73,6 +73,11 @@ def get_entities(tokens, tags, confidences, text):
73
  tags = [tag.replace("S-", "B-").replace("E-", "I-") for tag in tags]
74
  pos_tags = [pos for token, pos in pos_tag(tokens)]
75
 
 
 
 
 
 
76
  conlltags = [(token, pos, tg) for token, pos, tg in zip(tokens, pos_tags, tags)]
77
  ne_tree = conlltags2tree(conlltags)
78
 
 
73
  tags = [tag.replace("S-", "B-").replace("E-", "I-") for tag in tags]
74
  pos_tags = [pos for token, pos in pos_tag(tokens)]
75
 
76
+ for i in range(1, len(tags)):
77
+ # If a 'B-' tag is followed by another 'B-' without an 'O' in between, change the second to 'I-'
78
+ if tags[i].startswith("B-") and tags[i - 1].startswith("I-"):
79
+ tags[i] = "I-" + tags[i][2:] # Change 'B-' to 'I-' for the same entity type
80
+
81
  conlltags = [(token, pos, tg) for token, pos, tg in zip(tokens, pos_tags, tags)]
82
  ne_tree = conlltags2tree(conlltags)
83