emanuelaboros commited on
Commit
7c8b752
·
1 Parent(s): dfafcac

add comp back

Browse files
Files changed (1) hide show
  1. generic_ner.py +26 -7
generic_ner.py CHANGED
@@ -263,7 +263,7 @@ def conflicting_context(comp_entity, target_entity):
263
  def extract_name_from_text(text, partial_name):
264
  """
265
  Extracts the full name from the entity's text based on the partial name.
266
- This function assumes that the partial name is contained within the full name in the text.
267
  """
268
  text_lower = text.lower()
269
  partial_name_lower = partial_name.lower()
@@ -271,12 +271,31 @@ def extract_name_from_text(text, partial_name):
271
  # Find the position of the partial name in the text
272
  name_start_idx = text_lower.find(partial_name_lower)
273
 
274
- # Assuming the full name is everything before or after the partial name
275
- # Here, we assume the name is likely at the beginning of the sentence
276
  if name_start_idx != -1:
277
- # Extract the part of the text containing the full name
278
- full_name = text[: name_start_idx + len(partial_name)]
279
- return full_name.strip() # Return the full name, trimming any spaces
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
280
 
281
  # If not found, return the original text (as a fallback)
282
  return text.strip()
@@ -350,7 +369,7 @@ def postprocess_entities(entities):
350
  # Step 2: Attach "comp.function" entities to the closest other entities
351
  filtered_entities = attach_comp_to_closest(filtered_entities)
352
 
353
- # filtered_entities = repair_names_in_entities(filtered_entities)
354
 
355
  # Step 3: Remove entities that are not useful for NEL
356
  # filtered_entities = clean_coarse_entities(filtered_entities)
 
263
  def extract_name_from_text(text, partial_name):
264
  """
265
  Extracts the full name from the entity's text based on the partial name.
266
+ This function assumes that the partial name is part of a full name, which starts with capitalized letters.
267
  """
268
  text_lower = text.lower()
269
  partial_name_lower = partial_name.lower()
 
271
  # Find the position of the partial name in the text
272
  name_start_idx = text_lower.find(partial_name_lower)
273
 
 
 
274
  if name_start_idx != -1:
275
+ # Split the text into words
276
+ words = text.split()
277
+
278
+ # Find the position of the partial name in the word list
279
+ for i, word in enumerate(words):
280
+ if partial_name_lower in word.lower():
281
+ # Initialize full name with the partial name
282
+ full_name = [word]
283
+
284
+ # Check previous words to see if they start with a capital letter
285
+ j = i - 1
286
+ while j >= 0 and words[j][0].isupper():
287
+ full_name.insert(0, words[j])
288
+ j -= 1
289
+
290
+ # Check next words to see if they start with a capital letter
291
+ k = i + 1
292
+ while k < len(words) and words[k][0].isupper():
293
+ full_name.append(words[k])
294
+ k += 1
295
+
296
+ return " ".join(
297
+ full_name
298
+ ).strip() # Join the words to form the full name
299
 
300
  # If not found, return the original text (as a fallback)
301
  return text.strip()
 
369
  # Step 2: Attach "comp.function" entities to the closest other entities
370
  filtered_entities = attach_comp_to_closest(filtered_entities)
371
 
372
+ filtered_entities = repair_names_in_entities(filtered_entities)
373
 
374
  # Step 3: Remove entities that are not useful for NEL
375
  # filtered_entities = clean_coarse_entities(filtered_entities)