Suhasdev commited on
Commit
e235e41
·
1 Parent(s): ba6e49b

Fix pruning logic to remove stale elements within LCA subtree

Browse files
Files changed (1) hide show
  1. core_cleaner.py +37 -3
core_cleaner.py CHANGED
@@ -466,24 +466,58 @@ class XMLCleanerCore:
466
  if not active_lca: return 0
467
 
468
  stale_set = set(stale_elements)
 
469
  removed_count = 0
470
  current = active_lca
471
 
472
- # 2. Traverse Up and Prune Siblings
 
 
 
 
473
  while current is not None:
474
  parent = parent_map.get(current)
475
  if not parent: break
476
 
477
  siblings = [child for child in parent if child != current]
478
  for sibling in siblings:
479
- # If sibling tree has stale elements?
480
- # Simplified: If sibling is strictly in stale list or contains them
481
  if self._subtree_has_stale(sibling, stale_set):
482
  removed_count += len(list(sibling.iter()))
483
  parent.remove(sibling)
484
 
485
  current = parent
486
  return removed_count
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
487
 
488
  def _subtree_has_stale(self, node, stale_set):
489
  for x in node.iter():
 
466
  if not active_lca: return 0
467
 
468
  stale_set = set(stale_elements)
469
+ active_set = set(active_elements)
470
  removed_count = 0
471
  current = active_lca
472
 
473
+ # 2. Prune stale descendants within LCA subtree
474
+ # Remove stale elements that are descendants of LCA but not ancestors of any active element
475
+ removed_count += self._prune_stale_descendants(active_lca, active_set, stale_set, parent_map)
476
+
477
+ # 3. Traverse Up and Prune Siblings
478
  while current is not None:
479
  parent = parent_map.get(current)
480
  if not parent: break
481
 
482
  siblings = [child for child in parent if child != current]
483
  for sibling in siblings:
484
+ # If sibling tree has stale elements, remove the entire sibling subtree
 
485
  if self._subtree_has_stale(sibling, stale_set):
486
  removed_count += len(list(sibling.iter()))
487
  parent.remove(sibling)
488
 
489
  current = parent
490
  return removed_count
491
+
492
+ def _prune_stale_descendants(self, node, active_set, stale_set, parent_map):
493
+ """Prune stale elements that are descendants of node but not needed for active elements"""
494
+ removed_count = 0
495
+
496
+ # Get all children of the current node (create a copy to avoid modification during iteration)
497
+ children = list(node)
498
+
499
+ for child in children:
500
+ # Check if this child subtree contains any active elements
501
+ has_active = child in active_set or self._subtree_has_active(child, active_set)
502
+
503
+ if has_active:
504
+ # This subtree has active elements, recursively prune within it
505
+ removed_count += self._prune_stale_descendants(child, active_set, stale_set, parent_map)
506
+ else:
507
+ # This subtree has no active elements
508
+ # Remove if it contains stale elements (the entire subtree is stale)
509
+ if self._subtree_has_stale(child, stale_set):
510
+ removed_count += len(list(child.iter()))
511
+ node.remove(child)
512
+
513
+ return removed_count
514
+
515
+ def _subtree_has_active(self, node, active_set):
516
+ """Check if subtree contains any active elements"""
517
+ for elem in node.iter():
518
+ if elem in active_set:
519
+ return True
520
+ return False
521
 
522
  def _subtree_has_stale(self, node, stale_set):
523
  for x in node.iter():