Commit
·
2d1ae5b
1
Parent(s):
ba34bb6
add comp back
Browse files- generic_ner.py +10 -21
generic_ner.py
CHANGED
|
@@ -490,39 +490,28 @@ def refine_entities_with_coarse(all_entities, coarse_entities):
|
|
| 490 |
"""
|
| 491 |
Looks through all entities and refines them based on the coarse entities.
|
| 492 |
If a surface match is found in the coarse entities and the types match,
|
| 493 |
-
the entity
|
| 494 |
"""
|
| 495 |
-
refined_entities = []
|
| 496 |
-
|
| 497 |
# Create a dictionary for coarse entities based on surface and type for quick lookup
|
| 498 |
coarse_lookup = {}
|
| 499 |
for coarse_entity in coarse_entities:
|
| 500 |
-
key = (coarse_entity["surface"], coarse_entity["type"])
|
| 501 |
coarse_lookup[key] = coarse_entity
|
| 502 |
|
| 503 |
# Iterate through all entities and compare with the coarse entities
|
| 504 |
for entity in all_entities:
|
| 505 |
-
key = (
|
| 506 |
-
entity["surface"],
|
| 507 |
-
entity["type"].split(".")[0],
|
| 508 |
-
) # Use the coarse type for comparison
|
| 509 |
|
| 510 |
if key in coarse_lookup:
|
| 511 |
-
# If the types match, compare confidence_ner and keep the one with the higher confidence
|
| 512 |
coarse_entity = coarse_lookup[key]
|
| 513 |
-
|
| 514 |
-
|
| 515 |
-
|
| 516 |
-
|
| 517 |
-
|
| 518 |
-
|
| 519 |
-
|
| 520 |
-
) # Keep the coarse entity with higher confidence
|
| 521 |
-
else:
|
| 522 |
-
# If no match in coarse, just add the entity to refined entities
|
| 523 |
-
refined_entities.append(entity)
|
| 524 |
|
| 525 |
-
return refined_entities
|
| 526 |
|
| 527 |
|
| 528 |
def remove_trailing_stopwords(entities):
|
|
|
|
| 490 |
"""
|
| 491 |
Looks through all entities and refines them based on the coarse entities.
|
| 492 |
If a surface match is found in the coarse entities and the types match,
|
| 493 |
+
the entity's confidence_ner and type are updated based on the coarse entity.
|
| 494 |
"""
|
|
|
|
|
|
|
| 495 |
# Create a dictionary for coarse entities based on surface and type for quick lookup
|
| 496 |
coarse_lookup = {}
|
| 497 |
for coarse_entity in coarse_entities:
|
| 498 |
+
key = (coarse_entity["surface"], coarse_entity["type"].split(".")[0])
|
| 499 |
coarse_lookup[key] = coarse_entity
|
| 500 |
|
| 501 |
# Iterate through all entities and compare with the coarse entities
|
| 502 |
for entity in all_entities:
|
| 503 |
+
key = (entity["surface"], entity["type"].split(".")[0]) # Use the coarse type for comparison
|
|
|
|
|
|
|
|
|
|
| 504 |
|
| 505 |
if key in coarse_lookup:
|
|
|
|
| 506 |
coarse_entity = coarse_lookup[key]
|
| 507 |
+
# If a match is found, update the confidence_ner and type in the entity
|
| 508 |
+
if entity["confidence_ner"] < coarse_entity["confidence_ner"]:
|
| 509 |
+
entity["confidence_ner"] = coarse_entity["confidence_ner"]
|
| 510 |
+
entity["type"] = coarse_entity["type"] # Update the type if the confidence is higher
|
| 511 |
+
|
| 512 |
+
# No need to append to refined_entities, we're modifying in place
|
| 513 |
+
return all_entities
|
|
|
|
|
|
|
|
|
|
|
|
|
| 514 |
|
|
|
|
| 515 |
|
| 516 |
|
| 517 |
def remove_trailing_stopwords(entities):
|