Spaces:
Running
Running
| """Rendering helpers: entity-highlighted text, token chips, word cloud, charts.""" | |
| from __future__ import annotations | |
| import html | |
| import config | |
| def _hex_to_rgba(hex_color: str, alpha: float) -> str: | |
| """'#2a9d8f' + alpha -> 'rgba(42,157,143,0.83)'.""" | |
| h = hex_color.lstrip("#") | |
| r, g, b = (int(h[i:i + 2], 16) for i in (0, 2, 4)) | |
| return f"rgba({r},{g},{b},{alpha:.2f})" | |
| # --- Confidence heatmap ramp: low (red) -> mid (orange) -> high (teal) ------- | |
| _RAMP_LO = (214, 40, 40) # low confidence | |
| _RAMP_MID = (244, 162, 97) # mid | |
| _RAMP_HI = (42, 157, 143) # high confidence | |
| def _lerp(a: tuple, b: tuple, t: float) -> tuple: | |
| return tuple(round(a[i] + (b[i] - a[i]) * t) for i in range(3)) | |
| def _conf_color(t: float) -> str: | |
| """0..1 -> rgb on the red->orange->teal ramp.""" | |
| t = max(0.0, min(1.0, t)) | |
| r, g, b = (_lerp(_RAMP_LO, _RAMP_MID, t / 0.5) if t < 0.5 | |
| else _lerp(_RAMP_MID, _RAMP_HI, (t - 0.5) / 0.5)) | |
| return f"rgb({r},{g},{b})" | |
| def _stretch(conf: float, lo: float, hi: float) -> float: | |
| """Contrast-stretch a confidence into 0..1 across the observed [lo, hi].""" | |
| if hi - lo < 1e-6: | |
| return 1.0 | |
| return (conf - lo) / (hi - lo) | |
| def _conf_legend(lo: float, hi: float) -> str: | |
| grad = (f"linear-gradient(to right, rgb{_RAMP_LO}, rgb{_RAMP_MID}, rgb{_RAMP_HI})") | |
| return ( | |
| '<div style="margin-bottom:10px;font-size:0.8rem;color:#444">' | |
| f'<span style="margin-right:8px">low ({lo:.0%})</span>' | |
| f'<span style="display:inline-block;width:180px;height:12px;background:{grad};' | |
| 'border-radius:3px;vertical-align:middle"></span>' | |
| f'<span style="margin-left:8px">high ({hi:.0%}) — colour stretched across this CV</span>' | |
| '</div>' | |
| ) | |
| def _legend() -> str: | |
| items = [] | |
| for t in config.ENTITY_TYPES: | |
| items.append( | |
| f'<span style="background:{config.ENTITY_COLORS[t]};color:#fff;' | |
| f'padding:2px 8px;border-radius:4px;margin-right:8px;font-size:0.8rem">' | |
| f'{config.ENTITY_LABELS[t]}</span>' | |
| ) | |
| return '<div style="margin-bottom:10px">' + "".join(items) + "</div>" | |
| def render_entities_html(text: str, entities: list[dict], | |
| shade_by_conf: bool = False) -> str: | |
| """Original text with entity spans wrapped in coloured marks. | |
| When ``shade_by_conf`` is set, each mark's background opacity reflects the | |
| model's mean confidence for that entity, and the % is shown in the tooltip. | |
| """ | |
| ents = sorted((e for e in entities if e["type"] in config.ENTITY_COLORS), | |
| key=lambda e: e["start"]) | |
| cvals = [e.get("conf", 1.0) for e in ents] or [1.0] | |
| lo, hi = min(cvals), max(cvals) | |
| out, cursor = [], 0 | |
| for e in ents: | |
| if e["start"] < cursor: # skip any overlap defensively | |
| continue | |
| out.append(html.escape(text[cursor:e["start"]])) | |
| hex_color = config.ENTITY_COLORS[e["type"]] | |
| label = config.ENTITY_LABELS[e["type"]] | |
| conf = e.get("conf", 1.0) | |
| if shade_by_conf: | |
| bg = _conf_color(_stretch(conf, lo, hi)) | |
| title = f"{label} · {conf:.0%} confidence" | |
| else: | |
| bg = hex_color | |
| title = label | |
| out.append( | |
| f'<mark style="background:{bg};color:#fff;padding:1px 4px;' | |
| f'border-radius:4px" title="{title}">' | |
| f'{html.escape(text[e["start"]:e["end"]])}' | |
| f'<sub style="font-size:0.6em;opacity:.85"> {label}</sub></mark>' | |
| ) | |
| cursor = e["end"] | |
| out.append(html.escape(text[cursor:])) | |
| body = "".join(out).replace("\n", "<br>") | |
| return ((_conf_legend(lo, hi) if shade_by_conf else _legend()) + | |
| f'<div style="line-height:2.1;font-family:system-ui;font-size:0.95rem;' | |
| f'border:1px solid #ddd;border-radius:8px;padding:16px;' | |
| f'max-height:520px;overflow:auto">{body}</div>') | |
| def render_tokens_html(tokens: list[dict], limit: int = 400, | |
| shade_by_conf: bool = False) -> str: | |
| """Sub-word token chips, coloured by predicted label — the 'tokenization view'. | |
| When ``shade_by_conf`` is set, each chip's background opacity reflects the | |
| model's confidence in that token's label (low-confidence chips fade out), | |
| and the exact % shows on hover. | |
| """ | |
| shown = tokens[:limit] | |
| cvals = [t.get("conf", 1.0) for t in shown] or [1.0] | |
| lo, hi = min(cvals), max(cvals) | |
| chips = [] | |
| for t in shown: | |
| txt = html.escape(t["text"]) or "·" | |
| conf = t.get("conf", 1.0) | |
| title = f"{t['label']} · {conf:.0%}" | |
| if shade_by_conf: | |
| # Pure confidence heatmap: every token coloured on the ramp. | |
| style = f"background:{_conf_color(_stretch(conf, lo, hi))};color:#fff" | |
| elif t["type"] in config.ENTITY_COLORS: | |
| style = f"background:{config.ENTITY_COLORS[t['type']]};color:#fff" | |
| else: | |
| style = "background:#eee;color:#555" | |
| chips.append( | |
| f'<span title="{title}" style="{style};padding:2px 6px;border-radius:4px;' | |
| f'margin:2px;display:inline-block;font-family:monospace;font-size:0.8rem">{txt}</span>' | |
| ) | |
| more = "" if len(tokens) <= limit else f'<div style="color:#888;margin-top:8px">… +{len(tokens)-limit} more tokens</div>' | |
| return ((_conf_legend(lo, hi) if shade_by_conf else _legend()) + | |
| f'<div style="border:1px solid #ddd;border-radius:8px;padding:12px;' | |
| f'max-height:420px;overflow:auto">{"".join(chips)}{more}</div>') | |
| def wordcloud_figure(freq: dict, title: str = ""): | |
| """Return a matplotlib Figure for a frequency dict, or None if unavailable.""" | |
| if not freq: | |
| return None | |
| try: | |
| from wordcloud import WordCloud | |
| import matplotlib.pyplot as plt | |
| except ModuleNotFoundError: | |
| return None | |
| wc = WordCloud(width=900, height=400, background_color="white", | |
| colormap="viridis", prefer_horizontal=0.9) | |
| wc.generate_from_frequencies(freq) | |
| fig, ax = plt.subplots(figsize=(9, 4)) | |
| ax.imshow(wc, interpolation="bilinear") | |
| ax.axis("off") | |
| if title: | |
| ax.set_title(title) | |
| fig.tight_layout() | |
| return fig | |
| def top_bar_figure(counter, title: str, color: str, top_n: int = 15): | |
| """Horizontal bar chart of the most common items. Returns a Plotly fig or None.""" | |
| if not counter: | |
| return None | |
| try: | |
| import plotly.graph_objects as go | |
| except ModuleNotFoundError: | |
| return None | |
| items = counter.most_common(top_n)[::-1] | |
| labels = [k for k, _ in items] | |
| values = [v for _, v in items] | |
| fig = go.Figure(go.Bar(x=values, y=labels, orientation="h", | |
| marker_color=color)) | |
| fig.update_layout(title=title, height=max(300, 28 * len(items) + 80), | |
| margin=dict(l=10, r=10, t=40, b=10)) | |
| return fig | |