Spaces:
Sleeping
Sleeping
File size: 3,446 Bytes
37eaffd | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 | from __future__ import annotations
import warnings
from ..models.spans import ResolvedSpan
def _build_nesting_tree(flat_spans: list[ResolvedSpan]) -> list[ResolvedSpan]:
"""
Populate ResolvedSpan.children based on offset containment and return root spans.
Spans are sorted so that outer (longer) spans are processed before inner ones.
Overlapping (non-nesting) spans are skipped with a warning.
"""
# Sort: start asc, then end desc so outer spans come before inner at same start
spans = sorted(flat_spans, key=lambda s: (s.start, -(s.end - s.start)))
# Clear any children left from a previous call
for s in spans:
s.children = []
roots: list[ResolvedSpan] = []
stack: list[ResolvedSpan] = []
for span in spans:
rejected = False
# Pop stack entries that are fully before (or incompatibly overlap) this span
while stack:
top = stack[-1]
if top.start <= span.start and span.end <= top.end:
break # top properly contains span → it's the parent
elif span.start >= top.end:
stack.pop() # span comes after top → pop and continue
else:
# Partial overlap (neither contained nor after) → reject span
warnings.warn(
f"Overlapping spans [{top.start},{top.end}] and "
f"[{span.start},{span.end}] cannot be nested; "
f"skipping <{span.element}> span.",
stacklevel=3,
)
rejected = True
break
if rejected:
continue
if stack:
stack[-1].children.append(span)
else:
roots.append(span)
stack.append(span)
return roots
def _inject_recursive(
text: str,
spans: list[ResolvedSpan],
offset: int,
) -> str:
"""
Insert XML open/close tags for *spans* into *text*.
*offset* is the absolute position of text[0] in the original source, used
to translate span.start/end (absolute) to positions within *text*.
"""
if not spans:
return text
result: list[str] = []
cursor = 0 # relative position within text
for span in sorted(spans, key=lambda s: s.start):
rel_start = span.start - offset
rel_end = span.end - offset
# Text before this span
result.append(text[cursor:rel_start])
# Build tag strings
attrs_str = " ".join(f'{k}="{v}"' for k, v in span.attrs.items())
open_tag = f"<{span.element}" + (f" {attrs_str}" if attrs_str else "") + ">"
close_tag = f"</{span.element}>"
# Recursively inject children inside this span's content
inner = text[rel_start:rel_end]
if span.children:
inner = _inject_recursive(inner, span.children, offset=span.start)
result.append(open_tag)
result.append(inner)
result.append(close_tag)
cursor = rel_end
result.append(text[cursor:])
return "".join(result)
def inject_xml(source: str, spans: list[ResolvedSpan]) -> str:
"""
Insert XML tags into *source* at the positions defined by *spans*.
Nesting is inferred from offset containment via _build_nesting_tree.
"""
if not spans:
return source
root_spans = _build_nesting_tree(spans)
return _inject_recursive(source, root_spans, offset=0)
|