Commit
·
7c8b752
1
Parent(s):
dfafcac
add comp back
Browse files- generic_ner.py +26 -7
generic_ner.py
CHANGED
|
@@ -263,7 +263,7 @@ def conflicting_context(comp_entity, target_entity):
|
|
| 263 |
def extract_name_from_text(text, partial_name):
|
| 264 |
"""
|
| 265 |
Extracts the full name from the entity's text based on the partial name.
|
| 266 |
-
This function assumes that the partial name is
|
| 267 |
"""
|
| 268 |
text_lower = text.lower()
|
| 269 |
partial_name_lower = partial_name.lower()
|
|
@@ -271,12 +271,31 @@ def extract_name_from_text(text, partial_name):
|
|
| 271 |
# Find the position of the partial name in the text
|
| 272 |
name_start_idx = text_lower.find(partial_name_lower)
|
| 273 |
|
| 274 |
-
# Assuming the full name is everything before or after the partial name
|
| 275 |
-
# Here, we assume the name is likely at the beginning of the sentence
|
| 276 |
if name_start_idx != -1:
|
| 277 |
-
#
|
| 278 |
-
|
| 279 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 280 |
|
| 281 |
# If not found, return the original text (as a fallback)
|
| 282 |
return text.strip()
|
|
@@ -350,7 +369,7 @@ def postprocess_entities(entities):
|
|
| 350 |
# Step 2: Attach "comp.function" entities to the closest other entities
|
| 351 |
filtered_entities = attach_comp_to_closest(filtered_entities)
|
| 352 |
|
| 353 |
-
|
| 354 |
|
| 355 |
# Step 3: Remove entities that are not useful for NEL
|
| 356 |
# filtered_entities = clean_coarse_entities(filtered_entities)
|
|
|
|
| 263 |
def extract_name_from_text(text, partial_name):
|
| 264 |
"""
|
| 265 |
Extracts the full name from the entity's text based on the partial name.
|
| 266 |
+
This function assumes that the partial name is part of a full name, which starts with capitalized letters.
|
| 267 |
"""
|
| 268 |
text_lower = text.lower()
|
| 269 |
partial_name_lower = partial_name.lower()
|
|
|
|
| 271 |
# Find the position of the partial name in the text
|
| 272 |
name_start_idx = text_lower.find(partial_name_lower)
|
| 273 |
|
|
|
|
|
|
|
| 274 |
if name_start_idx != -1:
|
| 275 |
+
# Split the text into words
|
| 276 |
+
words = text.split()
|
| 277 |
+
|
| 278 |
+
# Find the position of the partial name in the word list
|
| 279 |
+
for i, word in enumerate(words):
|
| 280 |
+
if partial_name_lower in word.lower():
|
| 281 |
+
# Initialize full name with the partial name
|
| 282 |
+
full_name = [word]
|
| 283 |
+
|
| 284 |
+
# Check previous words to see if they start with a capital letter
|
| 285 |
+
j = i - 1
|
| 286 |
+
while j >= 0 and words[j][0].isupper():
|
| 287 |
+
full_name.insert(0, words[j])
|
| 288 |
+
j -= 1
|
| 289 |
+
|
| 290 |
+
# Check next words to see if they start with a capital letter
|
| 291 |
+
k = i + 1
|
| 292 |
+
while k < len(words) and words[k][0].isupper():
|
| 293 |
+
full_name.append(words[k])
|
| 294 |
+
k += 1
|
| 295 |
+
|
| 296 |
+
return " ".join(
|
| 297 |
+
full_name
|
| 298 |
+
).strip() # Join the words to form the full name
|
| 299 |
|
| 300 |
# If not found, return the original text (as a fallback)
|
| 301 |
return text.strip()
|
|
|
|
| 369 |
# Step 2: Attach "comp.function" entities to the closest other entities
|
| 370 |
filtered_entities = attach_comp_to_closest(filtered_entities)
|
| 371 |
|
| 372 |
+
filtered_entities = repair_names_in_entities(filtered_entities)
|
| 373 |
|
| 374 |
# Step 3: Remove entities that are not useful for NEL
|
| 375 |
# filtered_entities = clean_coarse_entities(filtered_entities)
|