Spaces:

ZurichNLP
/

llm-completion-playground

Running

File size: 4,530 Bytes

352de18

"""Build HTML for completion view with per-token probability styling."""

from __future__ import annotations

import base64
import html
import json
import math
from typing import Any


def _lerp_byte(low: int, high: int, weight: float) -> int:
    return int(round(low + (high - low) * weight))


# matplotlib / ColorBrewer sequential "Blues" (lighter range; keeps contrast with black text)
_SEQUENTIAL_BLUES_STOPS: list[tuple[int, int, int]] = [
    (247, 251, 255),  # #f7fbff
    (222, 235, 247),  # #deebf7
    (198, 219, 239),  # #c6dbef
    (158, 202, 225),  # #9ecae1
    (107, 174, 214),  # #6baed6
]


def _interpolate_sequential_stops(
    stops: list[tuple[int, int, int]],
    weight: float,
) -> tuple[int, int, int]:
    """Piecewise linear interpolation along `weight` in [0, 1]."""
    if len(stops) == 1:
        return stops[0]
    weight = max(0.0, min(1.0, weight))
    segment_count = len(stops) - 1
    scaled = weight * segment_count
    segment_index = int(math.floor(scaled))
    segment_index = min(segment_index, segment_count - 1)
    fraction = scaled - segment_index
    low = stops[segment_index]
    high = stops[segment_index + 1]
    return (
        _lerp_byte(low[0], high[0], fraction),
        _lerp_byte(low[1], high[1], fraction),
        _lerp_byte(low[2], high[2], fraction),
    )


def probability_to_css_background(probability: float) -> str:
    """
    Background tint linear in **probability** along a typical sequential Blues colormap.

    Uses the light band of ColorBrewer / matplotlib Blues so black text stays readable.
    """
    if math.isnan(probability):
        weight = 0.0
    else:
        weight = max(0.0, min(1.0, float(probability)))
    red, green, blue = _interpolate_sequential_stops(_SEQUENTIAL_BLUES_STOPS, weight)
    return f"rgb({red},{green},{blue})"


def _encode_tooltip_payload(
    alternatives: list[dict[str, Any]],
    sampled_token_text: str,
    sampled_probability: float,
    chosen_in_top5: bool,
) -> str:
    """Base64 JSON for safe use in a data attribute."""
    payload = json.dumps(
        {
            "alternatives": alternatives,
            "sampled_token": {
                "token_text": sampled_token_text,
                "probability": sampled_probability,
            },
            "chosen_in_top5": chosen_in_top5,
        },
        ensure_ascii=True,
    )
    return base64.b64encode(payload.encode("utf-8")).decode("ascii")


def build_completion_html(
    prompt_text: str,
    token_display_strings: list[str],
    chosen_probabilities: list[float],
    top5_alternatives: list[list[dict[str, Any]]],
    chosen_in_top5_flags: list[bool],
) -> str:
    """
    Build a single div with escaped prompt text and per-token spans for the completion.

    Each entry in top5_alternatives is up to five dicts with keys: token_text, probability.
    chosen_in_top5_flags indicates whether the sampled token appears in that top-5 list.
    """
    if len(token_display_strings) != len(chosen_probabilities):
        raise ValueError("token_display_strings and chosen_probabilities length mismatch")
    if len(token_display_strings) != len(top5_alternatives):
        raise ValueError("token_display_strings and top5_alternatives length mismatch")
    if len(token_display_strings) != len(chosen_in_top5_flags):
        raise ValueError("token_display_strings and chosen_in_top5_flags length mismatch")

    escaped_prompt = html.escape(prompt_text)
    parts: list[str] = [
        '<div class="completion-playground-root" style="white-space: pre-wrap; word-break: break-word;">',
        "<style>"
        ".completion-playground-root .completion-token{"
        "display:inline-block;vertical-align:baseline;"
        "}</style>",
        escaped_prompt,
    ]
    for display_text, probability, alternatives, chosen_in_top5 in zip(
        token_display_strings,
        chosen_probabilities,
        top5_alternatives,
        chosen_in_top5_flags,
        strict=True,
    ):
        background = probability_to_css_background(probability)
        payload = _encode_tooltip_payload(
            alternatives,
            display_text,
            probability,
            chosen_in_top5,
        )
        escaped_inner = html.escape(display_text)
        parts.append(
            f'<span class="completion-token" style="background-color:{background};cursor:pointer;" '
            f'data-top5="{html.escape(payload, quote=True)}">{escaped_inner}</span>'
        )
    parts.append("</div>")
    return "".join(parts)