emanuelaboros commited on
Commit
9e65353
·
1 Parent(s): 349c15b
Files changed (1) hide show
  1. generic_ner.py +45 -0
generic_ner.py CHANGED
@@ -272,6 +272,49 @@ def conflicting_context(comp_entity, target_entity):
272
  return False # No conflict
273
 
274
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
275
  def postprocess_entities(entities):
276
  # Step 1: Filter entities with the same text, keeping the one with the most dots in the 'entity' field
277
  entity_map = {}
@@ -294,6 +337,8 @@ def postprocess_entities(entities):
294
  # Step 2: Attach "comp.function" entities to the closest other entities
295
  filtered_entities = attach_comp_to_closest(filtered_entities)
296
 
 
 
297
  return filtered_entities
298
 
299
 
 
272
  return False # No conflict
273
 
274
 
275
+ def extract_name_from_text(text, partial_name):
276
+ """
277
+ Extracts the full name from the entity's text based on the partial name.
278
+ This function assumes that the partial name is contained within the full name in the text.
279
+ """
280
+ text_lower = text.lower()
281
+ partial_name_lower = partial_name.lower()
282
+
283
+ # Find the position of the partial name in the text
284
+ name_start_idx = text_lower.find(partial_name_lower)
285
+
286
+ # Assuming the full name is everything before or after the partial name
287
+ # Here, we assume the name is likely at the beginning of the sentence
288
+ if name_start_idx != -1:
289
+ # Extract the part of the text containing the full name
290
+ full_name = text[: name_start_idx + len(partial_name)]
291
+ return full_name.strip() # Return the full name, trimming any spaces
292
+
293
+ # If not found, return the original text (as a fallback)
294
+ return text.strip()
295
+
296
+
297
+ def repair_names_in_entities(entities):
298
+ """
299
+ This function repairs the names in the entities by extracting the full name
300
+ from the text of the entity if a partial name (e.g., 'Washington') is incorrectly attached.
301
+ """
302
+ for entity in entities:
303
+ if "name" in entity and "pers" in entity["entity"]:
304
+ name = entity["name"].lower()
305
+ text = entity["text"].lower()
306
+
307
+ # Check if the attached name is part of the entity's text
308
+ if name in text:
309
+ # Extract the full name from the text by splitting around the attached name
310
+ full_name = extract_name_from_text(entity["text"], name)
311
+ entity["name"] = (
312
+ full_name # Replace the partial name with the full name
313
+ )
314
+
315
+ return entities
316
+
317
+
318
  def postprocess_entities(entities):
319
  # Step 1: Filter entities with the same text, keeping the one with the most dots in the 'entity' field
320
  entity_map = {}
 
337
  # Step 2: Attach "comp.function" entities to the closest other entities
338
  filtered_entities = attach_comp_to_closest(filtered_entities)
339
 
340
+ filtered_entities = repair_names_in_entities(entities)
341
+
342
  return filtered_entities
343
 
344