Commit
·
0a9c7b1
1
Parent(s):
adb47ca
add comp back
Browse files- generic_ner.py +6 -8
generic_ner.py
CHANGED
|
@@ -282,24 +282,22 @@ def extract_name_from_text(text, partial_name):
|
|
| 282 |
# Initialize full name with the partial name
|
| 283 |
full_name = [word]
|
| 284 |
|
| 285 |
-
# Check previous words to find the
|
| 286 |
j = i - 1
|
| 287 |
while j >= 0:
|
| 288 |
-
#
|
| 289 |
if words[j][0].isupper():
|
| 290 |
-
|
|
|
|
|
|
|
|
|
|
| 291 |
j -= 1
|
| 292 |
|
| 293 |
-
# Now take all words from that capitalized word to the partial name
|
| 294 |
-
full_name = words[j:i+1] if j >= 0 else full_name
|
| 295 |
-
|
| 296 |
return ' '.join(full_name).strip() # Join the words to form the full name
|
| 297 |
|
| 298 |
# If not found, return the original text (as a fallback)
|
| 299 |
return text.strip()
|
| 300 |
|
| 301 |
-
|
| 302 |
-
|
| 303 |
def repair_names_in_entities(entities):
|
| 304 |
"""
|
| 305 |
This function repairs the names in the entities by extracting the full name
|
|
|
|
| 282 |
# Initialize full name with the partial name
|
| 283 |
full_name = [word]
|
| 284 |
|
| 285 |
+
# Check previous words to find the first capitalized word (include all in between)
|
| 286 |
j = i - 1
|
| 287 |
while j >= 0:
|
| 288 |
+
# As soon as we find a capitalized word, stop going backward
|
| 289 |
if words[j][0].isupper():
|
| 290 |
+
full_name.insert(0, words[j])
|
| 291 |
+
else:
|
| 292 |
+
# Continue adding lowercase words like titles (e.g., 'chancelier')
|
| 293 |
+
full_name.insert(0, words[j])
|
| 294 |
j -= 1
|
| 295 |
|
|
|
|
|
|
|
|
|
|
| 296 |
return ' '.join(full_name).strip() # Join the words to form the full name
|
| 297 |
|
| 298 |
# If not found, return the original text (as a fallback)
|
| 299 |
return text.strip()
|
| 300 |
|
|
|
|
|
|
|
| 301 |
def repair_names_in_entities(entities):
|
| 302 |
"""
|
| 303 |
This function repairs the names in the entities by extracting the full name
|