Commit
·
4b21d68
1
Parent(s):
45597c2
degbu
Browse files- generic_ner.py +18 -13
generic_ner.py
CHANGED
|
@@ -274,18 +274,15 @@ class MultitaskTokenClassificationPipeline(Pipeline):
|
|
| 274 |
"""Check if entity1 is fully within the bounds of entity2."""
|
| 275 |
return entity1["start"] >= entity2["start"] and entity1["end"] <= entity2["end"]
|
| 276 |
|
| 277 |
-
def postprocess_entities(self,
|
| 278 |
# Collect all entities in one list for processing
|
| 279 |
-
|
| 280 |
-
for
|
| 281 |
-
|
| 282 |
-
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
# "--",
|
| 287 |
-
# entity["word"],
|
| 288 |
-
# )
|
| 289 |
|
| 290 |
# Sort entities by start position, then by end position (to handle nested structures)
|
| 291 |
all_entities.sort(key=lambda x: (x["start"], -x["end"]))
|
|
@@ -360,10 +357,18 @@ class MultitaskTokenClassificationPipeline(Pipeline):
|
|
| 360 |
|
| 361 |
print("Before:")
|
| 362 |
pprint(entities)
|
| 363 |
-
postprocessed_entities = self.postprocess_entities(entities, text_sentence)
|
| 364 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 365 |
# Attach "comp.function" entities to the closest non-"comp.function" entity
|
| 366 |
postprocessed_entities = attach_comp_to_closest(postprocessed_entities)
|
| 367 |
-
print("After:")
|
| 368 |
pprint(postprocessed_entities)
|
|
|
|
| 369 |
return postprocessed_entities
|
|
|
|
| 274 |
"""Check if entity1 is fully within the bounds of entity2."""
|
| 275 |
return entity1["start"] >= entity2["start"] and entity1["end"] <= entity2["end"]
|
| 276 |
|
| 277 |
+
def postprocess_entities(self, all_entities, text_sentence):
|
| 278 |
# Collect all entities in one list for processing
|
| 279 |
+
|
| 280 |
+
# for entity in ner_results[key]:
|
| 281 |
+
# print(
|
| 282 |
+
# text_sentence[entity["start"] : entity["end"]],
|
| 283 |
+
# "--",
|
| 284 |
+
# entity["word"],
|
| 285 |
+
# )
|
|
|
|
|
|
|
|
|
|
| 286 |
|
| 287 |
# Sort entities by start position, then by end position (to handle nested structures)
|
| 288 |
all_entities.sort(key=lambda x: (x["start"], -x["end"]))
|
|
|
|
| 357 |
|
| 358 |
print("Before:")
|
| 359 |
pprint(entities)
|
|
|
|
| 360 |
|
| 361 |
+
all_entities = []
|
| 362 |
+
for key in entities:
|
| 363 |
+
if key not in ["NE-COARSE-LIT"]:
|
| 364 |
+
all_entities.extend(entities[key])
|
| 365 |
+
|
| 366 |
+
postprocessed_entities = self.postprocess_entities(all_entities, text_sentence)
|
| 367 |
+
print("After 1:")
|
| 368 |
+
pprint(postprocessed_entities)
|
| 369 |
# Attach "comp.function" entities to the closest non-"comp.function" entity
|
| 370 |
postprocessed_entities = attach_comp_to_closest(postprocessed_entities)
|
| 371 |
+
print("After 2:")
|
| 372 |
pprint(postprocessed_entities)
|
| 373 |
+
print("\n")
|
| 374 |
return postprocessed_entities
|