Commit ·
9e65353
1
Parent(s): 349c15b
debug
Browse files- generic_ner.py +45 -0
generic_ner.py
CHANGED
|
@@ -272,6 +272,49 @@ def conflicting_context(comp_entity, target_entity):
|
|
| 272 |
return False # No conflict
|
| 273 |
|
| 274 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 275 |
def postprocess_entities(entities):
|
| 276 |
# Step 1: Filter entities with the same text, keeping the one with the most dots in the 'entity' field
|
| 277 |
entity_map = {}
|
|
@@ -294,6 +337,8 @@ def postprocess_entities(entities):
|
|
| 294 |
# Step 2: Attach "comp.function" entities to the closest other entities
|
| 295 |
filtered_entities = attach_comp_to_closest(filtered_entities)
|
| 296 |
|
|
|
|
|
|
|
| 297 |
return filtered_entities
|
| 298 |
|
| 299 |
|
|
|
|
| 272 |
return False # No conflict
|
| 273 |
|
| 274 |
|
| 275 |
+
def extract_name_from_text(text, partial_name):
|
| 276 |
+
"""
|
| 277 |
+
Extracts the full name from the entity's text based on the partial name.
|
| 278 |
+
This function assumes that the partial name is contained within the full name in the text.
|
| 279 |
+
"""
|
| 280 |
+
text_lower = text.lower()
|
| 281 |
+
partial_name_lower = partial_name.lower()
|
| 282 |
+
|
| 283 |
+
# Find the position of the partial name in the text
|
| 284 |
+
name_start_idx = text_lower.find(partial_name_lower)
|
| 285 |
+
|
| 286 |
+
# Assuming the full name is everything before or after the partial name
|
| 287 |
+
# Here, we assume the name is likely at the beginning of the sentence
|
| 288 |
+
if name_start_idx != -1:
|
| 289 |
+
# Extract the part of the text containing the full name
|
| 290 |
+
full_name = text[: name_start_idx + len(partial_name)]
|
| 291 |
+
return full_name.strip() # Return the full name, trimming any spaces
|
| 292 |
+
|
| 293 |
+
# If not found, return the original text (as a fallback)
|
| 294 |
+
return text.strip()
|
| 295 |
+
|
| 296 |
+
|
| 297 |
+
def repair_names_in_entities(entities):
|
| 298 |
+
"""
|
| 299 |
+
This function repairs the names in the entities by extracting the full name
|
| 300 |
+
from the text of the entity if a partial name (e.g., 'Washington') is incorrectly attached.
|
| 301 |
+
"""
|
| 302 |
+
for entity in entities:
|
| 303 |
+
if "name" in entity and "pers" in entity["entity"]:
|
| 304 |
+
name = entity["name"].lower()
|
| 305 |
+
text = entity["text"].lower()
|
| 306 |
+
|
| 307 |
+
# Check if the attached name is part of the entity's text
|
| 308 |
+
if name in text:
|
| 309 |
+
# Extract the full name from the text by splitting around the attached name
|
| 310 |
+
full_name = extract_name_from_text(entity["text"], name)
|
| 311 |
+
entity["name"] = (
|
| 312 |
+
full_name # Replace the partial name with the full name
|
| 313 |
+
)
|
| 314 |
+
|
| 315 |
+
return entities
|
| 316 |
+
|
| 317 |
+
|
| 318 |
def postprocess_entities(entities):
|
| 319 |
# Step 1: Filter entities with the same text, keeping the one with the most dots in the 'entity' field
|
| 320 |
entity_map = {}
|
|
|
|
| 337 |
# Step 2: Attach "comp.function" entities to the closest other entities
|
| 338 |
filtered_entities = attach_comp_to_closest(filtered_entities)
|
| 339 |
|
| 340 |
+
filtered_entities = repair_names_in_entities(entities)
|
| 341 |
+
|
| 342 |
return filtered_entities
|
| 343 |
|
| 344 |
|