Commit
·
797e71f
1
Parent(s):
2d1ae5b
add comp back
Browse files- generic_ner.py +9 -4
generic_ner.py
CHANGED
|
@@ -500,20 +500,25 @@ def refine_entities_with_coarse(all_entities, coarse_entities):
|
|
| 500 |
|
| 501 |
# Iterate through all entities and compare with the coarse entities
|
| 502 |
for entity in all_entities:
|
| 503 |
-
key = (
|
|
|
|
|
|
|
|
|
|
| 504 |
|
| 505 |
if key in coarse_lookup:
|
| 506 |
coarse_entity = coarse_lookup[key]
|
| 507 |
# If a match is found, update the confidence_ner and type in the entity
|
| 508 |
if entity["confidence_ner"] < coarse_entity["confidence_ner"]:
|
| 509 |
entity["confidence_ner"] = coarse_entity["confidence_ner"]
|
| 510 |
-
entity["type"] = coarse_entity[
|
| 511 |
-
|
|
|
|
|
|
|
|
|
|
| 512 |
# No need to append to refined_entities, we're modifying in place
|
| 513 |
return all_entities
|
| 514 |
|
| 515 |
|
| 516 |
-
|
| 517 |
def remove_trailing_stopwords(entities):
|
| 518 |
"""
|
| 519 |
This function removes stopwords and punctuation from both the beginning and end of each entity's text
|
|
|
|
| 500 |
|
| 501 |
# Iterate through all entities and compare with the coarse entities
|
| 502 |
for entity in all_entities:
|
| 503 |
+
key = (
|
| 504 |
+
entity["surface"],
|
| 505 |
+
entity["type"].split(".")[0],
|
| 506 |
+
) # Use the coarse type for comparison
|
| 507 |
|
| 508 |
if key in coarse_lookup:
|
| 509 |
coarse_entity = coarse_lookup[key]
|
| 510 |
# If a match is found, update the confidence_ner and type in the entity
|
| 511 |
if entity["confidence_ner"] < coarse_entity["confidence_ner"]:
|
| 512 |
entity["confidence_ner"] = coarse_entity["confidence_ner"]
|
| 513 |
+
entity["type"] = coarse_entity[
|
| 514 |
+
"type"
|
| 515 |
+
] # Update the type if the confidence is higher
|
| 516 |
+
else:
|
| 517 |
+
entity["type"] = entity["type"].split('.')[0]
|
| 518 |
# No need to append to refined_entities, we're modifying in place
|
| 519 |
return all_entities
|
| 520 |
|
| 521 |
|
|
|
|
| 522 |
def remove_trailing_stopwords(entities):
|
| 523 |
"""
|
| 524 |
This function removes stopwords and punctuation from both the beginning and end of each entity's text
|