Commit
·
862237e
1
Parent(s):
95747e4
add comp back
Browse files- generic_ner.py +18 -26
generic_ner.py
CHANGED
|
@@ -297,29 +297,30 @@ def attach_comp_to_closest(entities):
|
|
| 297 |
closest_entity = None
|
| 298 |
min_distance = float("inf")
|
| 299 |
|
| 300 |
-
# Find the closest non-"comp
|
| 301 |
for other_entity in other_entities:
|
| 302 |
-
distance
|
| 303 |
-
|
| 304 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 305 |
|
| 306 |
-
# Ensure
|
| 307 |
if (
|
| 308 |
distance < min_distance
|
| 309 |
and other_entity["type"].split(".")[0] in valid_entity_types
|
| 310 |
):
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
min_distance = distance
|
| 314 |
-
closest_entity = other_entity
|
| 315 |
|
| 316 |
-
# Attach the "comp.function"
|
| 317 |
if closest_entity:
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
|
| 322 |
-
] # Attach the text using the suffix as the key
|
| 323 |
|
| 324 |
return other_entities
|
| 325 |
|
|
@@ -329,21 +330,12 @@ def conflicting_context(comp_entity, target_entity):
|
|
| 329 |
Determines if there is a conflict between the comp_entity and the target entity.
|
| 330 |
Prevents incorrect name and function attachments by using a rule-based approach.
|
| 331 |
"""
|
| 332 |
-
|
| 333 |
-
target_text = target_entity["surface"].lower()
|
| 334 |
-
|
| 335 |
-
# Case 1: Check if the comp.name is already part of the entity's text.
|
| 336 |
-
# if "pers" in target_entity["type"] and comp_entity["type"].startswith("comp.name"):
|
| 337 |
-
# # Avoid attaching a name if it's already part of the entity's surface text.
|
| 338 |
-
# if comp_text in target_text:
|
| 339 |
-
# return True # Conflict: Name is already part of the target entity's text
|
| 340 |
-
|
| 341 |
-
# Case 2: Check for correct function attachment to person or organization entities
|
| 342 |
if comp_entity["type"].startswith("comp.function"):
|
| 343 |
if not ("pers" in target_entity["type"] or "org" in target_entity["type"]):
|
| 344 |
return True # Conflict: Function should only attach to persons or organizations
|
| 345 |
|
| 346 |
-
# Case
|
| 347 |
if "loc" in target_entity["type"]:
|
| 348 |
return True # Conflict: comp.* entities should not attach to locations or similar types
|
| 349 |
|
|
|
|
| 297 |
closest_entity = None
|
| 298 |
min_distance = float("inf")
|
| 299 |
|
| 300 |
+
# Find the closest non-"comp" entity that is valid for attaching
|
| 301 |
for other_entity in other_entities:
|
| 302 |
+
# Calculate distance between the comp entity and the other entity
|
| 303 |
+
if comp_entity["lOffset"] > other_entity["rOffset"]:
|
| 304 |
+
distance = comp_entity["lOffset"] - other_entity["rOffset"]
|
| 305 |
+
elif comp_entity["rOffset"] < other_entity["lOffset"]:
|
| 306 |
+
distance = other_entity["lOffset"] - comp_entity["rOffset"]
|
| 307 |
+
else:
|
| 308 |
+
distance = 0 # They overlap or touch
|
| 309 |
|
| 310 |
+
# Ensure the entity type is valid and check for minimal distance
|
| 311 |
if (
|
| 312 |
distance < min_distance
|
| 313 |
and other_entity["type"].split(".")[0] in valid_entity_types
|
| 314 |
):
|
| 315 |
+
min_distance = distance
|
| 316 |
+
closest_entity = other_entity
|
|
|
|
|
|
|
| 317 |
|
| 318 |
+
# Attach the "comp.function" or "comp.name" if a valid entity is found
|
| 319 |
if closest_entity:
|
| 320 |
+
suffix = comp_entity["type"].split(".")[
|
| 321 |
+
-1
|
| 322 |
+
] # Extract the suffix (e.g., 'name', 'function')
|
| 323 |
+
closest_entity[suffix] = comp_entity["surface"] # Attach the text
|
|
|
|
| 324 |
|
| 325 |
return other_entities
|
| 326 |
|
|
|
|
| 330 |
Determines if there is a conflict between the comp_entity and the target entity.
|
| 331 |
Prevents incorrect name and function attachments by using a rule-based approach.
|
| 332 |
"""
|
| 333 |
+
# Case 1: Check for correct function attachment to person or organization entities
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 334 |
if comp_entity["type"].startswith("comp.function"):
|
| 335 |
if not ("pers" in target_entity["type"] or "org" in target_entity["type"]):
|
| 336 |
return True # Conflict: Function should only attach to persons or organizations
|
| 337 |
|
| 338 |
+
# Case 2: Avoid attaching comp.* entities to non-person, non-organization types (like locations)
|
| 339 |
if "loc" in target_entity["type"]:
|
| 340 |
return True # Conflict: comp.* entities should not attach to locations or similar types
|
| 341 |
|