Commit ·
a8fedb5
1
Parent(s): c08a424
debug
Browse files- generic_ner.py +8 -6
generic_ner.py
CHANGED
|
@@ -245,7 +245,10 @@ def postprocess_entities(entities):
|
|
| 245 |
num_dots = entity["entity"].count(".")
|
| 246 |
|
| 247 |
# If the entity text is new, or this entity has more dots, update the map
|
| 248 |
-
if
|
|
|
|
|
|
|
|
|
|
| 249 |
entity_map[entity_text] = entity
|
| 250 |
|
| 251 |
# Collect the filtered entities from the map
|
|
@@ -257,7 +260,6 @@ def postprocess_entities(entities):
|
|
| 257 |
return filtered_entities
|
| 258 |
|
| 259 |
|
| 260 |
-
|
| 261 |
def remove_included_entities(entities):
|
| 262 |
# Loop through entities and remove those whose text is included in another with the same label
|
| 263 |
final_entities = []
|
|
@@ -386,11 +388,11 @@ class MultitaskTokenClassificationPipeline(Pipeline):
|
|
| 386 |
# pprint(all_entities)
|
| 387 |
|
| 388 |
all_entities = remove_trailing_stopwords(all_entities)
|
| 389 |
-
|
| 390 |
-
|
| 391 |
# Attach "comp.function" entities to the closest non-"comp.function" entity
|
| 392 |
all_entities = attach_comp_to_closest(all_entities)
|
| 393 |
-
|
| 394 |
-
|
| 395 |
# print("\n")
|
| 396 |
return all_entities
|
|
|
|
| 245 |
num_dots = entity["entity"].count(".")
|
| 246 |
|
| 247 |
# If the entity text is new, or this entity has more dots, update the map
|
| 248 |
+
if (
|
| 249 |
+
entity_text not in entity_map
|
| 250 |
+
or entity_map[entity_text]["entity"].count(".") < num_dots
|
| 251 |
+
):
|
| 252 |
entity_map[entity_text] = entity
|
| 253 |
|
| 254 |
# Collect the filtered entities from the map
|
|
|
|
| 260 |
return filtered_entities
|
| 261 |
|
| 262 |
|
|
|
|
| 263 |
def remove_included_entities(entities):
|
| 264 |
# Loop through entities and remove those whose text is included in another with the same label
|
| 265 |
final_entities = []
|
|
|
|
| 388 |
# pprint(all_entities)
|
| 389 |
|
| 390 |
all_entities = remove_trailing_stopwords(all_entities)
|
| 391 |
+
print("After remove_trailing_stopwords:")
|
| 392 |
+
pprint(all_entities)
|
| 393 |
# Attach "comp.function" entities to the closest non-"comp.function" entity
|
| 394 |
all_entities = attach_comp_to_closest(all_entities)
|
| 395 |
+
print("After attach_comp_to_closest:")
|
| 396 |
+
pprint(all_entities)
|
| 397 |
# print("\n")
|
| 398 |
return all_entities
|