"""Rendering helpers: entity-highlighted text, token chips, word cloud, charts."""
from __future__ import annotations
import html
import config
def _hex_to_rgba(hex_color: str, alpha: float) -> str:
"""'#2a9d8f' + alpha -> 'rgba(42,157,143,0.83)'."""
h = hex_color.lstrip("#")
r, g, b = (int(h[i:i + 2], 16) for i in (0, 2, 4))
return f"rgba({r},{g},{b},{alpha:.2f})"
# --- Confidence heatmap ramp: low (red) -> mid (orange) -> high (teal) -------
_RAMP_LO = (214, 40, 40) # low confidence
_RAMP_MID = (244, 162, 97) # mid
_RAMP_HI = (42, 157, 143) # high confidence
def _lerp(a: tuple, b: tuple, t: float) -> tuple:
return tuple(round(a[i] + (b[i] - a[i]) * t) for i in range(3))
def _conf_color(t: float) -> str:
"""0..1 -> rgb on the red->orange->teal ramp."""
t = max(0.0, min(1.0, t))
r, g, b = (_lerp(_RAMP_LO, _RAMP_MID, t / 0.5) if t < 0.5
else _lerp(_RAMP_MID, _RAMP_HI, (t - 0.5) / 0.5))
return f"rgb({r},{g},{b})"
def _stretch(conf: float, lo: float, hi: float) -> float:
"""Contrast-stretch a confidence into 0..1 across the observed [lo, hi]."""
if hi - lo < 1e-6:
return 1.0
return (conf - lo) / (hi - lo)
def _conf_legend(lo: float, hi: float) -> str:
grad = (f"linear-gradient(to right, rgb{_RAMP_LO}, rgb{_RAMP_MID}, rgb{_RAMP_HI})")
return (
'
'
f'low ({lo:.0%})'
f''
f'high ({hi:.0%}) — colour stretched across this CV'
'
'
)
def _legend() -> str:
items = []
for t in config.ENTITY_TYPES:
items.append(
f''
f'{config.ENTITY_LABELS[t]}'
)
return '' + "".join(items) + "
"
def render_entities_html(text: str, entities: list[dict],
shade_by_conf: bool = False) -> str:
"""Original text with entity spans wrapped in coloured marks.
When ``shade_by_conf`` is set, each mark's background opacity reflects the
model's mean confidence for that entity, and the % is shown in the tooltip.
"""
ents = sorted((e for e in entities if e["type"] in config.ENTITY_COLORS),
key=lambda e: e["start"])
cvals = [e.get("conf", 1.0) for e in ents] or [1.0]
lo, hi = min(cvals), max(cvals)
out, cursor = [], 0
for e in ents:
if e["start"] < cursor: # skip any overlap defensively
continue
out.append(html.escape(text[cursor:e["start"]]))
hex_color = config.ENTITY_COLORS[e["type"]]
label = config.ENTITY_LABELS[e["type"]]
conf = e.get("conf", 1.0)
if shade_by_conf:
bg = _conf_color(_stretch(conf, lo, hi))
title = f"{label} · {conf:.0%} confidence"
else:
bg = hex_color
title = label
out.append(
f''
f'{html.escape(text[e["start"]:e["end"]])}'
f' {label}'
)
cursor = e["end"]
out.append(html.escape(text[cursor:]))
body = "".join(out).replace("\n", "
")
return ((_conf_legend(lo, hi) if shade_by_conf else _legend()) +
f'{body}
')
def render_tokens_html(tokens: list[dict], limit: int = 400,
shade_by_conf: bool = False) -> str:
"""Sub-word token chips, coloured by predicted label — the 'tokenization view'.
When ``shade_by_conf`` is set, each chip's background opacity reflects the
model's confidence in that token's label (low-confidence chips fade out),
and the exact % shows on hover.
"""
shown = tokens[:limit]
cvals = [t.get("conf", 1.0) for t in shown] or [1.0]
lo, hi = min(cvals), max(cvals)
chips = []
for t in shown:
txt = html.escape(t["text"]) or "·"
conf = t.get("conf", 1.0)
title = f"{t['label']} · {conf:.0%}"
if shade_by_conf:
# Pure confidence heatmap: every token coloured on the ramp.
style = f"background:{_conf_color(_stretch(conf, lo, hi))};color:#fff"
elif t["type"] in config.ENTITY_COLORS:
style = f"background:{config.ENTITY_COLORS[t['type']]};color:#fff"
else:
style = "background:#eee;color:#555"
chips.append(
f'{txt}'
)
more = "" if len(tokens) <= limit else f'… +{len(tokens)-limit} more tokens
'
return ((_conf_legend(lo, hi) if shade_by_conf else _legend()) +
f'{"".join(chips)}{more}
')
def wordcloud_figure(freq: dict, title: str = ""):
"""Return a matplotlib Figure for a frequency dict, or None if unavailable."""
if not freq:
return None
try:
from wordcloud import WordCloud
import matplotlib.pyplot as plt
except ModuleNotFoundError:
return None
wc = WordCloud(width=900, height=400, background_color="white",
colormap="viridis", prefer_horizontal=0.9)
wc.generate_from_frequencies(freq)
fig, ax = plt.subplots(figsize=(9, 4))
ax.imshow(wc, interpolation="bilinear")
ax.axis("off")
if title:
ax.set_title(title)
fig.tight_layout()
return fig
def top_bar_figure(counter, title: str, color: str, top_n: int = 15):
"""Horizontal bar chart of the most common items. Returns a Plotly fig or None."""
if not counter:
return None
try:
import plotly.graph_objects as go
except ModuleNotFoundError:
return None
items = counter.most_common(top_n)[::-1]
labels = [k for k, _ in items]
values = [v for _, v in items]
fig = go.Figure(go.Bar(x=values, y=labels, orientation="h",
marker_color=color))
fig.update_layout(title=title, height=max(300, 28 * len(items) + 80),
margin=dict(l=10, r=10, t=40, b=10))
return fig