Spaces:

Zeqhx
/

Automated-CV-Parser

Running

App Files Files Community

Automated-CV-Parser / lib /viz.py

Zeqh

Confidence heatmap + drop v1 models from dropdown

b091c09 about 16 hours ago

raw

history blame contribute delete

6.92 kB

	"""Rendering helpers: entity-highlighted text, token chips, word cloud, charts."""
	from __future__ import annotations

	import html

	import config


	def _hex_to_rgba(hex_color: str, alpha: float) -> str:
	"""'#2a9d8f' + alpha -> 'rgba(42,157,143,0.83)'."""
	h = hex_color.lstrip("#")
	r, g, b = (int(h[i:i + 2], 16) for i in (0, 2, 4))
	return f"rgba({r},{g},{b},{alpha:.2f})"


	# --- Confidence heatmap ramp: low (red) -> mid (orange) -> high (teal) -------
	_RAMP_LO = (214, 40, 40) # low confidence
	_RAMP_MID = (244, 162, 97) # mid
	_RAMP_HI = (42, 157, 143) # high confidence


	def _lerp(a: tuple, b: tuple, t: float) -> tuple:
	return tuple(round(a[i] + (b[i] - a[i]) * t) for i in range(3))


	def _conf_color(t: float) -> str:
	"""0..1 -> rgb on the red->orange->teal ramp."""
	t = max(0.0, min(1.0, t))
	r, g, b = (_lerp(_RAMP_LO, _RAMP_MID, t / 0.5) if t < 0.5
	else _lerp(_RAMP_MID, _RAMP_HI, (t - 0.5) / 0.5))
	return f"rgb({r},{g},{b})"


	def _stretch(conf: float, lo: float, hi: float) -> float:
	"""Contrast-stretch a confidence into 0..1 across the observed [lo, hi]."""
	if hi - lo < 1e-6:
	return 1.0
	return (conf - lo) / (hi - lo)


	def _conf_legend(lo: float, hi: float) -> str:
	grad = (f"linear-gradient(to right, rgb{_RAMP_LO}, rgb{_RAMP_MID}, rgb{_RAMP_HI})")
	return (
	'<div style="margin-bottom:10px;font-size:0.8rem;color:#444">'
	f'<span style="margin-right:8px">low ({lo:.0%})</span>'
	f'<span style="display:inline-block;width:180px;height:12px;background:{grad};'
	'border-radius:3px;vertical-align:middle"></span>'
	f'<span style="margin-left:8px">high ({hi:.0%}) — colour stretched across this CV</span>'
	'</div>'
	)


	def _legend() -> str:
	items = []
	for t in config.ENTITY_TYPES:
	items.append(
	f'<span style="background:{config.ENTITY_COLORS[t]};color:#fff;'
	f'padding:2px 8px;border-radius:4px;margin-right:8px;font-size:0.8rem">'
	f'{config.ENTITY_LABELS[t]}</span>'
	)
	return '<div style="margin-bottom:10px">' + "".join(items) + "</div>"


	def render_entities_html(text: str, entities: list[dict],
	shade_by_conf: bool = False) -> str:
	"""Original text with entity spans wrapped in coloured marks.

	When ``shade_by_conf`` is set, each mark's background opacity reflects the
	model's mean confidence for that entity, and the % is shown in the tooltip.
	"""
	ents = sorted((e for e in entities if e["type"] in config.ENTITY_COLORS),
	key=lambda e: e["start"])
	cvals = [e.get("conf", 1.0) for e in ents] or [1.0]
	lo, hi = min(cvals), max(cvals)
	out, cursor = [], 0
	for e in ents:
	if e["start"] < cursor: # skip any overlap defensively
	continue
	out.append(html.escape(text[cursor:e["start"]]))
	hex_color = config.ENTITY_COLORS[e["type"]]
	label = config.ENTITY_LABELS[e["type"]]
	conf = e.get("conf", 1.0)
	if shade_by_conf:
	bg = _conf_color(_stretch(conf, lo, hi))
	title = f"{label} · {conf:.0%} confidence"
	else:
	bg = hex_color
	title = label
	out.append(
	f'<mark style="background:{bg};color:#fff;padding:1px 4px;'
	f'border-radius:4px" title="{title}">'
	f'{html.escape(text[e["start"]:e["end"]])}'
	f'<sub style="font-size:0.6em;opacity:.85"> {label}</sub></mark>'
	)
	cursor = e["end"]
	out.append(html.escape(text[cursor:]))
	body = "".join(out).replace("\n", "<br>")
	return ((_conf_legend(lo, hi) if shade_by_conf else _legend()) +
	f'<div style="line-height:2.1;font-family:system-ui;font-size:0.95rem;'
	f'border:1px solid #ddd;border-radius:8px;padding:16px;'
	f'max-height:520px;overflow:auto">{body}</div>')


	def render_tokens_html(tokens: list[dict], limit: int = 400,
	shade_by_conf: bool = False) -> str:
	"""Sub-word token chips, coloured by predicted label — the 'tokenization view'.

	When ``shade_by_conf`` is set, each chip's background opacity reflects the
	model's confidence in that token's label (low-confidence chips fade out),
	and the exact % shows on hover.
	"""
	shown = tokens[:limit]
	cvals = [t.get("conf", 1.0) for t in shown] or [1.0]
	lo, hi = min(cvals), max(cvals)
	chips = []
	for t in shown:
	txt = html.escape(t["text"]) or "·"
	conf = t.get("conf", 1.0)
	title = f"{t['label']} · {conf:.0%}"
	if shade_by_conf:
	# Pure confidence heatmap: every token coloured on the ramp.
	style = f"background:{_conf_color(_stretch(conf, lo, hi))};color:#fff"
	elif t["type"] in config.ENTITY_COLORS:
	style = f"background:{config.ENTITY_COLORS[t['type']]};color:#fff"
	else:
	style = "background:#eee;color:#555"
	chips.append(
	f'<span title="{title}" style="{style};padding:2px 6px;border-radius:4px;'
	f'margin:2px;display:inline-block;font-family:monospace;font-size:0.8rem">{txt}</span>'
	)
	more = "" if len(tokens) <= limit else f'<div style="color:#888;margin-top:8px">… +{len(tokens)-limit} more tokens</div>'
	return ((_conf_legend(lo, hi) if shade_by_conf else _legend()) +
	f'<div style="border:1px solid #ddd;border-radius:8px;padding:12px;'
	f'max-height:420px;overflow:auto">{"".join(chips)}{more}</div>')


	def wordcloud_figure(freq: dict, title: str = ""):
	"""Return a matplotlib Figure for a frequency dict, or None if unavailable."""
	if not freq:
	return None
	try:
	from wordcloud import WordCloud
	import matplotlib.pyplot as plt
	except ModuleNotFoundError:
	return None
	wc = WordCloud(width=900, height=400, background_color="white",
	colormap="viridis", prefer_horizontal=0.9)
	wc.generate_from_frequencies(freq)
	fig, ax = plt.subplots(figsize=(9, 4))
	ax.imshow(wc, interpolation="bilinear")
	ax.axis("off")
	if title:
	ax.set_title(title)
	fig.tight_layout()
	return fig


	def top_bar_figure(counter, title: str, color: str, top_n: int = 15):
	"""Horizontal bar chart of the most common items. Returns a Plotly fig or None."""
	if not counter:
	return None
	try:
	import plotly.graph_objects as go
	except ModuleNotFoundError:
	return None
	items = counter.most_common(top_n)[::-1]
	labels = [k for k, _ in items]
	values = [v for _, v in items]
	fig = go.Figure(go.Bar(x=values, y=labels, orientation="h",
	marker_color=color))
	fig.update_layout(title=title, height=max(300, 28 * len(items) + 80),
	margin=dict(l=10, r=10, t=40, b=10))
	return fig