llm-completion-playground / completion_html.py
jvamvas's picture
Initial commit
352de18
"""Build HTML for completion view with per-token probability styling."""
from __future__ import annotations
import base64
import html
import json
import math
from typing import Any
def _lerp_byte(low: int, high: int, weight: float) -> int:
return int(round(low + (high - low) * weight))
# matplotlib / ColorBrewer sequential "Blues" (lighter range; keeps contrast with black text)
_SEQUENTIAL_BLUES_STOPS: list[tuple[int, int, int]] = [
(247, 251, 255), # #f7fbff
(222, 235, 247), # #deebf7
(198, 219, 239), # #c6dbef
(158, 202, 225), # #9ecae1
(107, 174, 214), # #6baed6
]
def _interpolate_sequential_stops(
stops: list[tuple[int, int, int]],
weight: float,
) -> tuple[int, int, int]:
"""Piecewise linear interpolation along `weight` in [0, 1]."""
if len(stops) == 1:
return stops[0]
weight = max(0.0, min(1.0, weight))
segment_count = len(stops) - 1
scaled = weight * segment_count
segment_index = int(math.floor(scaled))
segment_index = min(segment_index, segment_count - 1)
fraction = scaled - segment_index
low = stops[segment_index]
high = stops[segment_index + 1]
return (
_lerp_byte(low[0], high[0], fraction),
_lerp_byte(low[1], high[1], fraction),
_lerp_byte(low[2], high[2], fraction),
)
def probability_to_css_background(probability: float) -> str:
"""
Background tint linear in **probability** along a typical sequential Blues colormap.
Uses the light band of ColorBrewer / matplotlib Blues so black text stays readable.
"""
if math.isnan(probability):
weight = 0.0
else:
weight = max(0.0, min(1.0, float(probability)))
red, green, blue = _interpolate_sequential_stops(_SEQUENTIAL_BLUES_STOPS, weight)
return f"rgb({red},{green},{blue})"
def _encode_tooltip_payload(
alternatives: list[dict[str, Any]],
sampled_token_text: str,
sampled_probability: float,
chosen_in_top5: bool,
) -> str:
"""Base64 JSON for safe use in a data attribute."""
payload = json.dumps(
{
"alternatives": alternatives,
"sampled_token": {
"token_text": sampled_token_text,
"probability": sampled_probability,
},
"chosen_in_top5": chosen_in_top5,
},
ensure_ascii=True,
)
return base64.b64encode(payload.encode("utf-8")).decode("ascii")
def build_completion_html(
prompt_text: str,
token_display_strings: list[str],
chosen_probabilities: list[float],
top5_alternatives: list[list[dict[str, Any]]],
chosen_in_top5_flags: list[bool],
) -> str:
"""
Build a single div with escaped prompt text and per-token spans for the completion.
Each entry in top5_alternatives is up to five dicts with keys: token_text, probability.
chosen_in_top5_flags indicates whether the sampled token appears in that top-5 list.
"""
if len(token_display_strings) != len(chosen_probabilities):
raise ValueError("token_display_strings and chosen_probabilities length mismatch")
if len(token_display_strings) != len(top5_alternatives):
raise ValueError("token_display_strings and top5_alternatives length mismatch")
if len(token_display_strings) != len(chosen_in_top5_flags):
raise ValueError("token_display_strings and chosen_in_top5_flags length mismatch")
escaped_prompt = html.escape(prompt_text)
parts: list[str] = [
'<div class="completion-playground-root" style="white-space: pre-wrap; word-break: break-word;">',
"<style>"
".completion-playground-root .completion-token{"
"display:inline-block;vertical-align:baseline;"
"}</style>",
escaped_prompt,
]
for display_text, probability, alternatives, chosen_in_top5 in zip(
token_display_strings,
chosen_probabilities,
top5_alternatives,
chosen_in_top5_flags,
strict=True,
):
background = probability_to_css_background(probability)
payload = _encode_tooltip_payload(
alternatives,
display_text,
probability,
chosen_in_top5,
)
escaped_inner = html.escape(display_text)
parts.append(
f'<span class="completion-token" style="background-color:{background};cursor:pointer;" '
f'data-top5="{html.escape(payload, quote=True)}">{escaped_inner}</span>'
)
parts.append("</div>")
return "".join(parts)