| """HTML diff highlighting utilities.""" |
|
|
| import difflib |
| import html |
|
|
| |
| |
| |
| |
| |
| _WHITESPACE_MARKERS = { |
| " ": "␣", |
| "\t": "→", |
| "\u00a0": "␣", |
| } |
|
|
|
|
| def _visualize_whitespace(escaped: str) -> str: |
| """Replace whitespace chars with visible markers inside a diff span. |
| |
| Operates on already HTML-escaped text. Newlines are handled separately |
| by the final `\n` -> `<br>` pass, but we mark them inline too so the |
| reader sees *where* a newline was inserted/removed before the <br>. |
| """ |
| parts: list[str] = [] |
| for ch in escaped: |
| if ch == "\n": |
| parts.append( |
| '<span style="opacity:0.6;">↵</span>\n' |
| ) |
| elif ch in _WHITESPACE_MARKERS: |
| parts.append(f'<span style="opacity:0.6;">{_WHITESPACE_MARKERS[ch]}</span>') |
| else: |
| parts.append(ch) |
| return "".join(parts) |
|
|
|
|
| def _render_segment(escaped: str) -> str: |
| """Render a diff segment, adding whitespace markers only when the whole |
| segment is whitespace. |
| |
| Rationale: a pure-whitespace diff (e.g. a single inserted space) is |
| otherwise invisible, so we show ␣/↵/→. But when the segment already |
| contains visible text, the colored background is enough — marking the |
| incidental spaces would just add noise to word-level edits. |
| """ |
| if escaped and escaped.strip() == "": |
| return _visualize_whitespace(escaped) |
| return escaped |
|
|
|
|
| def highlight_diff(original: str, corrected: str) -> str: |
| """Generate HTML highlighting differences between original and corrected text. |
| |
| Uses character-level difflib.SequenceMatcher to produce inline HTML |
| with strikethrough for deletions and green highlights for insertions. |
| Whitespace changes are rendered with visible markers (␣ for space, |
| ↵ for newline) so spacing-only edits are perceivable. |
| |
| Args: |
| original: Original text. |
| corrected: Corrected text. |
| |
| Returns: |
| HTML string with diff highlights. Safe against XSS. |
| """ |
| |
| original_escaped = html.escape(original) |
| corrected_escaped = html.escape(corrected) |
|
|
| matcher = difflib.SequenceMatcher(None, original_escaped, corrected_escaped) |
| result_parts: list[str] = [] |
|
|
| del_style = "background:#ffecec;text-decoration:line-through;" |
| ins_style = "background:#e6ffec;" |
|
|
| for tag, i1, i2, j1, j2 in matcher.get_opcodes(): |
| orig_seg = original_escaped[i1:i2] |
| corr_seg = corrected_escaped[j1:j2] |
|
|
| if tag == "equal": |
| result_parts.append(f"<span>{orig_seg}</span>") |
| elif tag == "replace": |
| result_parts.append( |
| f'<span style="{del_style}">{_render_segment(orig_seg)}</span>' |
| ) |
| result_parts.append( |
| f'<span style="{ins_style}">{_render_segment(corr_seg)}</span>' |
| ) |
| elif tag == "delete": |
| result_parts.append( |
| f'<span style="{del_style}">{_render_segment(orig_seg)}</span>' |
| ) |
| elif tag == "insert": |
| result_parts.append( |
| f'<span style="{ins_style}">{_render_segment(corr_seg)}</span>' |
| ) |
|
|
| result_html = "".join(result_parts) |
| |
| return result_html.replace("\n", "<br>") |
|
|