Commit
·
734888b
1
Parent(s):
dc71c13
add comp back
Browse files- generic_ner.py +12 -6
generic_ner.py
CHANGED
|
@@ -360,24 +360,29 @@ def extract_name_from_text(text, partial_name):
|
|
| 360 |
words = tokenize(text)
|
| 361 |
partial_words = partial_name.split()
|
| 362 |
|
| 363 |
-
|
| 364 |
-
|
|
|
|
|
|
|
| 365 |
|
| 366 |
# Find the position of the partial name in the word list
|
| 367 |
for i, word in enumerate(words):
|
| 368 |
-
|
|
|
|
| 369 |
if words[i : i + len(partial_words)] == partial_words:
|
| 370 |
# Initialize full name with the partial name
|
| 371 |
full_name = partial_words[:]
|
| 372 |
|
| 373 |
-
|
|
|
|
| 374 |
|
| 375 |
# Check previous words and only add capitalized words (skip lowercase words)
|
| 376 |
j = i - 1
|
| 377 |
while j >= 0 and words[j][0].isupper():
|
| 378 |
full_name.insert(0, words[j])
|
| 379 |
j -= 1
|
| 380 |
-
|
|
|
|
| 381 |
|
| 382 |
# Return only the full name up to the partial name (ignore words after the name)
|
| 383 |
return " ".join(full_name).strip() # Join the words to form the full name
|
|
@@ -714,7 +719,8 @@ class MultitaskTokenClassificationPipeline(Pipeline):
|
|
| 714 |
# if key not in ["NE-COARSE-LIT"]:
|
| 715 |
all_entities.extend(entities[key])
|
| 716 |
|
| 717 |
-
|
|
|
|
| 718 |
# print("After remove_included_entities:")
|
| 719 |
all_entities = remove_included_entities(all_entities)
|
| 720 |
all_entities = remove_trailing_stopwords(all_entities)
|
|
|
|
| 360 |
words = tokenize(text)
|
| 361 |
partial_words = partial_name.split()
|
| 362 |
|
| 363 |
+
if DEBUG:
|
| 364 |
+
print("text:", text)
|
| 365 |
+
if DEBUG:
|
| 366 |
+
print("partial_name:", partial_name)
|
| 367 |
|
| 368 |
# Find the position of the partial name in the word list
|
| 369 |
for i, word in enumerate(words):
|
| 370 |
+
if DEBUG:
|
| 371 |
+
print(words, "---", words[i : i + len(partial_words)])
|
| 372 |
if words[i : i + len(partial_words)] == partial_words:
|
| 373 |
# Initialize full name with the partial name
|
| 374 |
full_name = partial_words[:]
|
| 375 |
|
| 376 |
+
if DEBUG:
|
| 377 |
+
print("full_name:", full_name)
|
| 378 |
|
| 379 |
# Check previous words and only add capitalized words (skip lowercase words)
|
| 380 |
j = i - 1
|
| 381 |
while j >= 0 and words[j][0].isupper():
|
| 382 |
full_name.insert(0, words[j])
|
| 383 |
j -= 1
|
| 384 |
+
if DEBUG:
|
| 385 |
+
print("full_name:", full_name)
|
| 386 |
|
| 387 |
# Return only the full name up to the partial name (ignore words after the name)
|
| 388 |
return " ".join(full_name).strip() # Join the words to form the full name
|
|
|
|
| 719 |
# if key not in ["NE-COARSE-LIT"]:
|
| 720 |
all_entities.extend(entities[key])
|
| 721 |
|
| 722 |
+
if DEBUG:
|
| 723 |
+
print(all_entities)
|
| 724 |
# print("After remove_included_entities:")
|
| 725 |
all_entities = remove_included_entities(all_entities)
|
| 726 |
all_entities = remove_trailing_stopwords(all_entities)
|