"""Build HTML for completion view with per-token probability styling.""" from __future__ import annotations import base64 import html import json import math from typing import Any def _lerp_byte(low: int, high: int, weight: float) -> int: return int(round(low + (high - low) * weight)) # matplotlib / ColorBrewer sequential "Blues" (lighter range; keeps contrast with black text) _SEQUENTIAL_BLUES_STOPS: list[tuple[int, int, int]] = [ (247, 251, 255), # #f7fbff (222, 235, 247), # #deebf7 (198, 219, 239), # #c6dbef (158, 202, 225), # #9ecae1 (107, 174, 214), # #6baed6 ] def _interpolate_sequential_stops( stops: list[tuple[int, int, int]], weight: float, ) -> tuple[int, int, int]: """Piecewise linear interpolation along `weight` in [0, 1].""" if len(stops) == 1: return stops[0] weight = max(0.0, min(1.0, weight)) segment_count = len(stops) - 1 scaled = weight * segment_count segment_index = int(math.floor(scaled)) segment_index = min(segment_index, segment_count - 1) fraction = scaled - segment_index low = stops[segment_index] high = stops[segment_index + 1] return ( _lerp_byte(low[0], high[0], fraction), _lerp_byte(low[1], high[1], fraction), _lerp_byte(low[2], high[2], fraction), ) def probability_to_css_background(probability: float) -> str: """ Background tint linear in **probability** along a typical sequential Blues colormap. Uses the light band of ColorBrewer / matplotlib Blues so black text stays readable. """ if math.isnan(probability): weight = 0.0 else: weight = max(0.0, min(1.0, float(probability))) red, green, blue = _interpolate_sequential_stops(_SEQUENTIAL_BLUES_STOPS, weight) return f"rgb({red},{green},{blue})" def _encode_tooltip_payload( alternatives: list[dict[str, Any]], sampled_token_text: str, sampled_probability: float, chosen_in_top5: bool, ) -> str: """Base64 JSON for safe use in a data attribute.""" payload = json.dumps( { "alternatives": alternatives, "sampled_token": { "token_text": sampled_token_text, "probability": sampled_probability, }, "chosen_in_top5": chosen_in_top5, }, ensure_ascii=True, ) return base64.b64encode(payload.encode("utf-8")).decode("ascii") def build_completion_html( prompt_text: str, token_display_strings: list[str], chosen_probabilities: list[float], top5_alternatives: list[list[dict[str, Any]]], chosen_in_top5_flags: list[bool], ) -> str: """ Build a single div with escaped prompt text and per-token spans for the completion. Each entry in top5_alternatives is up to five dicts with keys: token_text, probability. chosen_in_top5_flags indicates whether the sampled token appears in that top-5 list. """ if len(token_display_strings) != len(chosen_probabilities): raise ValueError("token_display_strings and chosen_probabilities length mismatch") if len(token_display_strings) != len(top5_alternatives): raise ValueError("token_display_strings and top5_alternatives length mismatch") if len(token_display_strings) != len(chosen_in_top5_flags): raise ValueError("token_display_strings and chosen_in_top5_flags length mismatch") escaped_prompt = html.escape(prompt_text) parts: list[str] = [ '
', "", escaped_prompt, ] for display_text, probability, alternatives, chosen_in_top5 in zip( token_display_strings, chosen_probabilities, top5_alternatives, chosen_in_top5_flags, strict=True, ): background = probability_to_css_background(probability) payload = _encode_tooltip_payload( alternatives, display_text, probability, chosen_in_top5, ) escaped_inner = html.escape(display_text) parts.append( f'{escaped_inner}' ) parts.append("
") return "".join(parts)