emanuelaboros commited on
Commit
2afe88b
·
1 Parent(s): bc35ffb
Files changed (1) hide show
  1. generic_ner.py +21 -1
generic_ner.py CHANGED
@@ -256,6 +256,22 @@ def postprocess_entities(entities):
256
  return filtered_entities
257
 
258
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
259
  class MultitaskTokenClassificationPipeline(Pipeline):
260
 
261
  def _sanitize_parameters(self, **kwargs):
@@ -381,10 +397,14 @@ class MultitaskTokenClassificationPipeline(Pipeline):
381
  )
382
 
383
  print("After 1:")
 
 
 
 
384
  pprint(all_entities)
385
  # Attach "comp.function" entities to the closest non-"comp.function" entity
386
  all_entities = attach_comp_to_closest(all_entities)
387
- print("After 2:")
388
  pprint(all_entities)
389
  print("\n")
390
  return all_entities
 
256
  return filtered_entities
257
 
258
 
259
+ def remove_included_entities(entities):
260
+ # Loop through entities and remove those whose text is included in another with the same label
261
+ final_entities = []
262
+ for i, entity in enumerate(entities):
263
+ is_included = False
264
+ for other_entity in entities:
265
+ if entity != other_entity and entity["entity"] == other_entity["entity"]:
266
+ # Check if entity's text is a substring of another entity's text
267
+ if entity["text"] in other_entity["text"]:
268
+ is_included = True
269
+ break
270
+ if not is_included:
271
+ final_entities.append(entity)
272
+ return final_entities
273
+
274
+
275
  class MultitaskTokenClassificationPipeline(Pipeline):
276
 
277
  def _sanitize_parameters(self, **kwargs):
 
397
  )
398
 
399
  print("After 1:")
400
+ all_entities = remove_included_entities(all_entities)
401
+ pprint(all_entities)
402
+
403
+ print("After 2:")
404
  pprint(all_entities)
405
  # Attach "comp.function" entities to the closest non-"comp.function" entity
406
  all_entities = attach_comp_to_closest(all_entities)
407
+ print("After 3:")
408
  pprint(all_entities)
409
  print("\n")
410
  return all_entities