Commit
·
adb47ca
1
Parent(s):
7c8b752
add comp back
Browse files- generic_ner.py +11 -12
generic_ner.py
CHANGED
|
@@ -263,7 +263,8 @@ def conflicting_context(comp_entity, target_entity):
|
|
| 263 |
def extract_name_from_text(text, partial_name):
|
| 264 |
"""
|
| 265 |
Extracts the full name from the entity's text based on the partial name.
|
| 266 |
-
This function
|
|
|
|
| 267 |
"""
|
| 268 |
text_lower = text.lower()
|
| 269 |
partial_name_lower = partial_name.lower()
|
|
@@ -281,26 +282,24 @@ def extract_name_from_text(text, partial_name):
|
|
| 281 |
# Initialize full name with the partial name
|
| 282 |
full_name = [word]
|
| 283 |
|
| 284 |
-
# Check previous words to
|
| 285 |
j = i - 1
|
| 286 |
-
while j >= 0
|
| 287 |
-
|
|
|
|
|
|
|
| 288 |
j -= 1
|
| 289 |
|
| 290 |
-
#
|
| 291 |
-
|
| 292 |
-
while k < len(words) and words[k][0].isupper():
|
| 293 |
-
full_name.append(words[k])
|
| 294 |
-
k += 1
|
| 295 |
|
| 296 |
-
return
|
| 297 |
-
full_name
|
| 298 |
-
).strip() # Join the words to form the full name
|
| 299 |
|
| 300 |
# If not found, return the original text (as a fallback)
|
| 301 |
return text.strip()
|
| 302 |
|
| 303 |
|
|
|
|
| 304 |
def repair_names_in_entities(entities):
|
| 305 |
"""
|
| 306 |
This function repairs the names in the entities by extracting the full name
|
|
|
|
| 263 |
def extract_name_from_text(text, partial_name):
|
| 264 |
"""
|
| 265 |
Extracts the full name from the entity's text based on the partial name.
|
| 266 |
+
This function starts from the partial name, checks backward for the last capitalized word,
|
| 267 |
+
and includes all words from that point up to the partial name, including any lowercase words in between.
|
| 268 |
"""
|
| 269 |
text_lower = text.lower()
|
| 270 |
partial_name_lower = partial_name.lower()
|
|
|
|
| 282 |
# Initialize full name with the partial name
|
| 283 |
full_name = [word]
|
| 284 |
|
| 285 |
+
# Check previous words to find the last capitalized word and include all in between
|
| 286 |
j = i - 1
|
| 287 |
+
while j >= 0:
|
| 288 |
+
# Stop once we find the first capitalized word
|
| 289 |
+
if words[j][0].isupper():
|
| 290 |
+
break
|
| 291 |
j -= 1
|
| 292 |
|
| 293 |
+
# Now take all words from that capitalized word to the partial name
|
| 294 |
+
full_name = words[j:i+1] if j >= 0 else full_name
|
|
|
|
|
|
|
|
|
|
| 295 |
|
| 296 |
+
return ' '.join(full_name).strip() # Join the words to form the full name
|
|
|
|
|
|
|
| 297 |
|
| 298 |
# If not found, return the original text (as a fallback)
|
| 299 |
return text.strip()
|
| 300 |
|
| 301 |
|
| 302 |
+
|
| 303 |
def repair_names_in_entities(entities):
|
| 304 |
"""
|
| 305 |
This function repairs the names in the entities by extracting the full name
|