Spaces:
Sleeping
Sleeping
Fix pruning logic to remove stale elements within LCA subtree
Browse files- core_cleaner.py +37 -3
core_cleaner.py
CHANGED
|
@@ -466,24 +466,58 @@ class XMLCleanerCore:
|
|
| 466 |
if not active_lca: return 0
|
| 467 |
|
| 468 |
stale_set = set(stale_elements)
|
|
|
|
| 469 |
removed_count = 0
|
| 470 |
current = active_lca
|
| 471 |
|
| 472 |
-
# 2.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 473 |
while current is not None:
|
| 474 |
parent = parent_map.get(current)
|
| 475 |
if not parent: break
|
| 476 |
|
| 477 |
siblings = [child for child in parent if child != current]
|
| 478 |
for sibling in siblings:
|
| 479 |
-
# If sibling tree has stale elements
|
| 480 |
-
# Simplified: If sibling is strictly in stale list or contains them
|
| 481 |
if self._subtree_has_stale(sibling, stale_set):
|
| 482 |
removed_count += len(list(sibling.iter()))
|
| 483 |
parent.remove(sibling)
|
| 484 |
|
| 485 |
current = parent
|
| 486 |
return removed_count
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 487 |
|
| 488 |
def _subtree_has_stale(self, node, stale_set):
|
| 489 |
for x in node.iter():
|
|
|
|
| 466 |
if not active_lca: return 0
|
| 467 |
|
| 468 |
stale_set = set(stale_elements)
|
| 469 |
+
active_set = set(active_elements)
|
| 470 |
removed_count = 0
|
| 471 |
current = active_lca
|
| 472 |
|
| 473 |
+
# 2. Prune stale descendants within LCA subtree
|
| 474 |
+
# Remove stale elements that are descendants of LCA but not ancestors of any active element
|
| 475 |
+
removed_count += self._prune_stale_descendants(active_lca, active_set, stale_set, parent_map)
|
| 476 |
+
|
| 477 |
+
# 3. Traverse Up and Prune Siblings
|
| 478 |
while current is not None:
|
| 479 |
parent = parent_map.get(current)
|
| 480 |
if not parent: break
|
| 481 |
|
| 482 |
siblings = [child for child in parent if child != current]
|
| 483 |
for sibling in siblings:
|
| 484 |
+
# If sibling tree has stale elements, remove the entire sibling subtree
|
|
|
|
| 485 |
if self._subtree_has_stale(sibling, stale_set):
|
| 486 |
removed_count += len(list(sibling.iter()))
|
| 487 |
parent.remove(sibling)
|
| 488 |
|
| 489 |
current = parent
|
| 490 |
return removed_count
|
| 491 |
+
|
| 492 |
+
def _prune_stale_descendants(self, node, active_set, stale_set, parent_map):
|
| 493 |
+
"""Prune stale elements that are descendants of node but not needed for active elements"""
|
| 494 |
+
removed_count = 0
|
| 495 |
+
|
| 496 |
+
# Get all children of the current node (create a copy to avoid modification during iteration)
|
| 497 |
+
children = list(node)
|
| 498 |
+
|
| 499 |
+
for child in children:
|
| 500 |
+
# Check if this child subtree contains any active elements
|
| 501 |
+
has_active = child in active_set or self._subtree_has_active(child, active_set)
|
| 502 |
+
|
| 503 |
+
if has_active:
|
| 504 |
+
# This subtree has active elements, recursively prune within it
|
| 505 |
+
removed_count += self._prune_stale_descendants(child, active_set, stale_set, parent_map)
|
| 506 |
+
else:
|
| 507 |
+
# This subtree has no active elements
|
| 508 |
+
# Remove if it contains stale elements (the entire subtree is stale)
|
| 509 |
+
if self._subtree_has_stale(child, stale_set):
|
| 510 |
+
removed_count += len(list(child.iter()))
|
| 511 |
+
node.remove(child)
|
| 512 |
+
|
| 513 |
+
return removed_count
|
| 514 |
+
|
| 515 |
+
def _subtree_has_active(self, node, active_set):
|
| 516 |
+
"""Check if subtree contains any active elements"""
|
| 517 |
+
for elem in node.iter():
|
| 518 |
+
if elem in active_set:
|
| 519 |
+
return True
|
| 520 |
+
return False
|
| 521 |
|
| 522 |
def _subtree_has_stale(self, node, stale_set):
|
| 523 |
for x in node.iter():
|