Commit
·
2afe88b
1
Parent(s):
bc35ffb
degbu
Browse files- generic_ner.py +21 -1
generic_ner.py
CHANGED
|
@@ -256,6 +256,22 @@ def postprocess_entities(entities):
|
|
| 256 |
return filtered_entities
|
| 257 |
|
| 258 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 259 |
class MultitaskTokenClassificationPipeline(Pipeline):
|
| 260 |
|
| 261 |
def _sanitize_parameters(self, **kwargs):
|
|
@@ -381,10 +397,14 @@ class MultitaskTokenClassificationPipeline(Pipeline):
|
|
| 381 |
)
|
| 382 |
|
| 383 |
print("After 1:")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 384 |
pprint(all_entities)
|
| 385 |
# Attach "comp.function" entities to the closest non-"comp.function" entity
|
| 386 |
all_entities = attach_comp_to_closest(all_entities)
|
| 387 |
-
print("After
|
| 388 |
pprint(all_entities)
|
| 389 |
print("\n")
|
| 390 |
return all_entities
|
|
|
|
| 256 |
return filtered_entities
|
| 257 |
|
| 258 |
|
| 259 |
+
def remove_included_entities(entities):
|
| 260 |
+
# Loop through entities and remove those whose text is included in another with the same label
|
| 261 |
+
final_entities = []
|
| 262 |
+
for i, entity in enumerate(entities):
|
| 263 |
+
is_included = False
|
| 264 |
+
for other_entity in entities:
|
| 265 |
+
if entity != other_entity and entity["entity"] == other_entity["entity"]:
|
| 266 |
+
# Check if entity's text is a substring of another entity's text
|
| 267 |
+
if entity["text"] in other_entity["text"]:
|
| 268 |
+
is_included = True
|
| 269 |
+
break
|
| 270 |
+
if not is_included:
|
| 271 |
+
final_entities.append(entity)
|
| 272 |
+
return final_entities
|
| 273 |
+
|
| 274 |
+
|
| 275 |
class MultitaskTokenClassificationPipeline(Pipeline):
|
| 276 |
|
| 277 |
def _sanitize_parameters(self, **kwargs):
|
|
|
|
| 397 |
)
|
| 398 |
|
| 399 |
print("After 1:")
|
| 400 |
+
all_entities = remove_included_entities(all_entities)
|
| 401 |
+
pprint(all_entities)
|
| 402 |
+
|
| 403 |
+
print("After 2:")
|
| 404 |
pprint(all_entities)
|
| 405 |
# Attach "comp.function" entities to the closest non-"comp.function" entity
|
| 406 |
all_entities = attach_comp_to_closest(all_entities)
|
| 407 |
+
print("After 3:")
|
| 408 |
pprint(all_entities)
|
| 409 |
print("\n")
|
| 410 |
return all_entities
|