import difflib import html as _h def _word_diff(original: str, compressed: str) -> tuple[str, str]: """ Word-level SequenceMatcher diff. Returns (annotated_original_html, annotated_compressed_html). Colour key: original — red strikethrough = dropped original — plain = survived unchanged compressed — amber = rewritten (replaced) compressed — green = inserted (rare; model added a connector word) compressed — plain = survived unchanged """ orig_words = original.split() comp_words = compressed.split() matcher = difflib.SequenceMatcher(None, orig_words, comp_words, autojunk=False) orig_parts: list[str] = [] comp_parts: list[str] = [] for tag, i1, i2, j1, j2 in matcher.get_opcodes(): ow = _h.escape(" ".join(orig_words[i1:i2])) cw = _h.escape(" ".join(comp_words[j1:j2])) if tag == "equal": orig_parts.append(ow) comp_parts.append(cw) elif tag == "delete": orig_parts.append( f'{ow}' ) elif tag == "insert": comp_parts.append( f'{cw}' ) elif tag == "replace": orig_parts.append( f'{ow}' ) comp_parts.append( f'{cw}' ) return " ".join(orig_parts), " ".join(comp_parts) def render_diff_html(record: dict) -> str: """Build a self-contained side-by-side diff HTML block for a compression run.""" original = record.get("input_text", "") compressed = record.get("output_text", "") if not original or not compressed: return "" orig_html, comp_html = _word_diff(original, compressed) model = _h.escape(record.get("model", "—")) tokenizer = _h.escape(record.get("tokenizer", "—")) ts = _h.escape(record.get("timestamp", "—")) in_tok = record.get("input_tokens", "—") out_tok = record.get("output_tokens", "—") target_tok = record.get("target_tokens", "—") ratio = record.get("compression_ratio", 0) quality = record.get("quality_score", 0) duration = record.get("duration_ms", "—") run_id = record.get("id", "—") feedback_val = record.get("feedback") feedback_note = _h.escape(record.get("feedback_comment") or "") # Build optional feedback block if feedback_val is not None: badge_bg = "#f0fdf4" if feedback_val == 1 else "#fef2f2" badge_color = "#15803d" if feedback_val == 1 else "#b91c1c" badge_text = "👍 Helpful" if feedback_val == 1 else "👎 Not helpful" feedback_block = ( f'