emanuelaboros commited on
Commit
2d1ae5b
·
1 Parent(s): ba34bb6

add comp back

Browse files
Files changed (1) hide show
  1. generic_ner.py +10 -21
generic_ner.py CHANGED
@@ -490,39 +490,28 @@ def refine_entities_with_coarse(all_entities, coarse_entities):
490
  """
491
  Looks through all entities and refines them based on the coarse entities.
492
  If a surface match is found in the coarse entities and the types match,
493
- the entity with the higher confidence_ner is kept.
494
  """
495
- refined_entities = []
496
-
497
  # Create a dictionary for coarse entities based on surface and type for quick lookup
498
  coarse_lookup = {}
499
  for coarse_entity in coarse_entities:
500
- key = (coarse_entity["surface"], coarse_entity["type"])
501
  coarse_lookup[key] = coarse_entity
502
 
503
  # Iterate through all entities and compare with the coarse entities
504
  for entity in all_entities:
505
- key = (
506
- entity["surface"],
507
- entity["type"].split(".")[0],
508
- ) # Use the coarse type for comparison
509
 
510
  if key in coarse_lookup:
511
- # If the types match, compare confidence_ner and keep the one with the higher confidence
512
  coarse_entity = coarse_lookup[key]
513
- if entity["confidence_ner"] > coarse_entity["confidence_ner"]:
514
- refined_entities.append(
515
- entity
516
- ) # Keep the current entity with higher confidence
517
- else:
518
- refined_entities.append(
519
- coarse_entity
520
- ) # Keep the coarse entity with higher confidence
521
- else:
522
- # If no match in coarse, just add the entity to refined entities
523
- refined_entities.append(entity)
524
 
525
- return refined_entities
526
 
527
 
528
  def remove_trailing_stopwords(entities):
 
490
  """
491
  Looks through all entities and refines them based on the coarse entities.
492
  If a surface match is found in the coarse entities and the types match,
493
+ the entity's confidence_ner and type are updated based on the coarse entity.
494
  """
 
 
495
  # Create a dictionary for coarse entities based on surface and type for quick lookup
496
  coarse_lookup = {}
497
  for coarse_entity in coarse_entities:
498
+ key = (coarse_entity["surface"], coarse_entity["type"].split(".")[0])
499
  coarse_lookup[key] = coarse_entity
500
 
501
  # Iterate through all entities and compare with the coarse entities
502
  for entity in all_entities:
503
+ key = (entity["surface"], entity["type"].split(".")[0]) # Use the coarse type for comparison
 
 
 
504
 
505
  if key in coarse_lookup:
 
506
  coarse_entity = coarse_lookup[key]
507
+ # If a match is found, update the confidence_ner and type in the entity
508
+ if entity["confidence_ner"] < coarse_entity["confidence_ner"]:
509
+ entity["confidence_ner"] = coarse_entity["confidence_ner"]
510
+ entity["type"] = coarse_entity["type"] # Update the type if the confidence is higher
511
+
512
+ # No need to append to refined_entities, we're modifying in place
513
+ return all_entities
 
 
 
 
514
 
 
515
 
516
 
517
  def remove_trailing_stopwords(entities):