emanuelaboros commited on
Commit
734888b
·
1 Parent(s): dc71c13

add comp back

Browse files
Files changed (1) hide show
  1. generic_ner.py +12 -6
generic_ner.py CHANGED
@@ -360,24 +360,29 @@ def extract_name_from_text(text, partial_name):
360
  words = tokenize(text)
361
  partial_words = partial_name.split()
362
 
363
- print("text:", text)
364
- print("partial_name:", partial_name)
 
 
365
 
366
  # Find the position of the partial name in the word list
367
  for i, word in enumerate(words):
368
- print(words, "---", words[i : i + len(partial_words)])
 
369
  if words[i : i + len(partial_words)] == partial_words:
370
  # Initialize full name with the partial name
371
  full_name = partial_words[:]
372
 
373
- print("full_name:", full_name)
 
374
 
375
  # Check previous words and only add capitalized words (skip lowercase words)
376
  j = i - 1
377
  while j >= 0 and words[j][0].isupper():
378
  full_name.insert(0, words[j])
379
  j -= 1
380
- print("full_name:", full_name)
 
381
 
382
  # Return only the full name up to the partial name (ignore words after the name)
383
  return " ".join(full_name).strip() # Join the words to form the full name
@@ -714,7 +719,8 @@ class MultitaskTokenClassificationPipeline(Pipeline):
714
  # if key not in ["NE-COARSE-LIT"]:
715
  all_entities.extend(entities[key])
716
 
717
- print(all_entities)
 
718
  # print("After remove_included_entities:")
719
  all_entities = remove_included_entities(all_entities)
720
  all_entities = remove_trailing_stopwords(all_entities)
 
360
  words = tokenize(text)
361
  partial_words = partial_name.split()
362
 
363
+ if DEBUG:
364
+ print("text:", text)
365
+ if DEBUG:
366
+ print("partial_name:", partial_name)
367
 
368
  # Find the position of the partial name in the word list
369
  for i, word in enumerate(words):
370
+ if DEBUG:
371
+ print(words, "---", words[i : i + len(partial_words)])
372
  if words[i : i + len(partial_words)] == partial_words:
373
  # Initialize full name with the partial name
374
  full_name = partial_words[:]
375
 
376
+ if DEBUG:
377
+ print("full_name:", full_name)
378
 
379
  # Check previous words and only add capitalized words (skip lowercase words)
380
  j = i - 1
381
  while j >= 0 and words[j][0].isupper():
382
  full_name.insert(0, words[j])
383
  j -= 1
384
+ if DEBUG:
385
+ print("full_name:", full_name)
386
 
387
  # Return only the full name up to the partial name (ignore words after the name)
388
  return " ".join(full_name).strip() # Join the words to form the full name
 
719
  # if key not in ["NE-COARSE-LIT"]:
720
  all_entities.extend(entities[key])
721
 
722
+ if DEBUG:
723
+ print(all_entities)
724
  # print("After remove_included_entities:")
725
  all_entities = remove_included_entities(all_entities)
726
  all_entities = remove_trailing_stopwords(all_entities)