File size: 3,686 Bytes
61d7017 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 | """HTML diff highlighting utilities."""
import difflib
import html
# Visible markers for whitespace differences so they don't become invisible
# inside colored diff spans.
# ␣ (U+2423 OPEN BOX) for regular space
# ↵ (U+21B5 DOWNWARDS ARROW WITH CORNER LEFTWARDS) for newline
# → (U+2192 RIGHTWARDS ARROW) for tab
_WHITESPACE_MARKERS = {
" ": "␣",
"\t": "→",
"\u00a0": "␣", # non-breaking space
}
def _visualize_whitespace(escaped: str) -> str:
"""Replace whitespace chars with visible markers inside a diff span.
Operates on already HTML-escaped text. Newlines are handled separately
by the final `\n` -> `<br>` pass, but we mark them inline too so the
reader sees *where* a newline was inserted/removed before the <br>.
"""
parts: list[str] = []
for ch in escaped:
if ch == "\n":
parts.append(
'<span style="opacity:0.6;">↵</span>\n'
) # marker + real newline (for <br>)
elif ch in _WHITESPACE_MARKERS:
parts.append(f'<span style="opacity:0.6;">{_WHITESPACE_MARKERS[ch]}</span>')
else:
parts.append(ch)
return "".join(parts)
def _render_segment(escaped: str) -> str:
"""Render a diff segment, adding whitespace markers only when the whole
segment is whitespace.
Rationale: a pure-whitespace diff (e.g. a single inserted space) is
otherwise invisible, so we show ␣/↵/→. But when the segment already
contains visible text, the colored background is enough — marking the
incidental spaces would just add noise to word-level edits.
"""
if escaped and escaped.strip() == "":
return _visualize_whitespace(escaped)
return escaped
def highlight_diff(original: str, corrected: str) -> str:
"""Generate HTML highlighting differences between original and corrected text.
Uses character-level difflib.SequenceMatcher to produce inline HTML
with strikethrough for deletions and green highlights for insertions.
Whitespace changes are rendered with visible markers (␣ for space,
↵ for newline) so spacing-only edits are perceivable.
Args:
original: Original text.
corrected: Corrected text.
Returns:
HTML string with diff highlights. Safe against XSS.
"""
# Escape HTML entities BEFORE diffing to prevent XSS
original_escaped = html.escape(original)
corrected_escaped = html.escape(corrected)
matcher = difflib.SequenceMatcher(None, original_escaped, corrected_escaped)
result_parts: list[str] = []
del_style = "background:#ffecec;text-decoration:line-through;"
ins_style = "background:#e6ffec;"
for tag, i1, i2, j1, j2 in matcher.get_opcodes():
orig_seg = original_escaped[i1:i2]
corr_seg = corrected_escaped[j1:j2]
if tag == "equal":
result_parts.append(f"<span>{orig_seg}</span>")
elif tag == "replace":
result_parts.append(
f'<span style="{del_style}">{_render_segment(orig_seg)}</span>'
)
result_parts.append(
f'<span style="{ins_style}">{_render_segment(corr_seg)}</span>'
)
elif tag == "delete":
result_parts.append(
f'<span style="{del_style}">{_render_segment(orig_seg)}</span>'
)
elif tag == "insert":
result_parts.append(
f'<span style="{ins_style}">{_render_segment(corr_seg)}</span>'
)
result_html = "".join(result_parts)
# Convert real newlines (outside diff spans or inside equal spans) to <br>
return result_html.replace("\n", "<br>")
|