"""Rendering helpers: entity-highlighted text, token chips, word cloud, charts.""" from __future__ import annotations import html import config def _hex_to_rgba(hex_color: str, alpha: float) -> str: """'#2a9d8f' + alpha -> 'rgba(42,157,143,0.83)'.""" h = hex_color.lstrip("#") r, g, b = (int(h[i:i + 2], 16) for i in (0, 2, 4)) return f"rgba({r},{g},{b},{alpha:.2f})" # --- Confidence heatmap ramp: low (red) -> mid (orange) -> high (teal) ------- _RAMP_LO = (214, 40, 40) # low confidence _RAMP_MID = (244, 162, 97) # mid _RAMP_HI = (42, 157, 143) # high confidence def _lerp(a: tuple, b: tuple, t: float) -> tuple: return tuple(round(a[i] + (b[i] - a[i]) * t) for i in range(3)) def _conf_color(t: float) -> str: """0..1 -> rgb on the red->orange->teal ramp.""" t = max(0.0, min(1.0, t)) r, g, b = (_lerp(_RAMP_LO, _RAMP_MID, t / 0.5) if t < 0.5 else _lerp(_RAMP_MID, _RAMP_HI, (t - 0.5) / 0.5)) return f"rgb({r},{g},{b})" def _stretch(conf: float, lo: float, hi: float) -> float: """Contrast-stretch a confidence into 0..1 across the observed [lo, hi].""" if hi - lo < 1e-6: return 1.0 return (conf - lo) / (hi - lo) def _conf_legend(lo: float, hi: float) -> str: grad = (f"linear-gradient(to right, rgb{_RAMP_LO}, rgb{_RAMP_MID}, rgb{_RAMP_HI})") return ( '
' f'low ({lo:.0%})' f'' f'high ({hi:.0%}) — colour stretched across this CV' '
' ) def _legend() -> str: items = [] for t in config.ENTITY_TYPES: items.append( f'' f'{config.ENTITY_LABELS[t]}' ) return '
' + "".join(items) + "
" def render_entities_html(text: str, entities: list[dict], shade_by_conf: bool = False) -> str: """Original text with entity spans wrapped in coloured marks. When ``shade_by_conf`` is set, each mark's background opacity reflects the model's mean confidence for that entity, and the % is shown in the tooltip. """ ents = sorted((e for e in entities if e["type"] in config.ENTITY_COLORS), key=lambda e: e["start"]) cvals = [e.get("conf", 1.0) for e in ents] or [1.0] lo, hi = min(cvals), max(cvals) out, cursor = [], 0 for e in ents: if e["start"] < cursor: # skip any overlap defensively continue out.append(html.escape(text[cursor:e["start"]])) hex_color = config.ENTITY_COLORS[e["type"]] label = config.ENTITY_LABELS[e["type"]] conf = e.get("conf", 1.0) if shade_by_conf: bg = _conf_color(_stretch(conf, lo, hi)) title = f"{label} · {conf:.0%} confidence" else: bg = hex_color title = label out.append( f'' f'{html.escape(text[e["start"]:e["end"]])}' f' {label}' ) cursor = e["end"] out.append(html.escape(text[cursor:])) body = "".join(out).replace("\n", "
") return ((_conf_legend(lo, hi) if shade_by_conf else _legend()) + f'
{body}
') def render_tokens_html(tokens: list[dict], limit: int = 400, shade_by_conf: bool = False) -> str: """Sub-word token chips, coloured by predicted label — the 'tokenization view'. When ``shade_by_conf`` is set, each chip's background opacity reflects the model's confidence in that token's label (low-confidence chips fade out), and the exact % shows on hover. """ shown = tokens[:limit] cvals = [t.get("conf", 1.0) for t in shown] or [1.0] lo, hi = min(cvals), max(cvals) chips = [] for t in shown: txt = html.escape(t["text"]) or "·" conf = t.get("conf", 1.0) title = f"{t['label']} · {conf:.0%}" if shade_by_conf: # Pure confidence heatmap: every token coloured on the ramp. style = f"background:{_conf_color(_stretch(conf, lo, hi))};color:#fff" elif t["type"] in config.ENTITY_COLORS: style = f"background:{config.ENTITY_COLORS[t['type']]};color:#fff" else: style = "background:#eee;color:#555" chips.append( f'{txt}' ) more = "" if len(tokens) <= limit else f'
… +{len(tokens)-limit} more tokens
' return ((_conf_legend(lo, hi) if shade_by_conf else _legend()) + f'
{"".join(chips)}{more}
') def wordcloud_figure(freq: dict, title: str = ""): """Return a matplotlib Figure for a frequency dict, or None if unavailable.""" if not freq: return None try: from wordcloud import WordCloud import matplotlib.pyplot as plt except ModuleNotFoundError: return None wc = WordCloud(width=900, height=400, background_color="white", colormap="viridis", prefer_horizontal=0.9) wc.generate_from_frequencies(freq) fig, ax = plt.subplots(figsize=(9, 4)) ax.imshow(wc, interpolation="bilinear") ax.axis("off") if title: ax.set_title(title) fig.tight_layout() return fig def top_bar_figure(counter, title: str, color: str, top_n: int = 15): """Horizontal bar chart of the most common items. Returns a Plotly fig or None.""" if not counter: return None try: import plotly.graph_objects as go except ModuleNotFoundError: return None items = counter.most_common(top_n)[::-1] labels = [k for k, _ in items] values = [v for _, v in items] fig = go.Figure(go.Bar(x=values, y=labels, orientation="h", marker_color=color)) fig.update_layout(title=title, height=max(300, 28 * len(items) + 80), margin=dict(l=10, r=10, t=40, b=10)) return fig