Spaces:

iitolstykh
/

GigaCheck

Running on Zero

File size: 5,365 Bytes

7c51531

"""HTML rendering helpers for classifier and detector results."""

from __future__ import annotations

import html

from validation import CONFIG

AiInterval = tuple[int, int, float]


def prettify_label(label: str) -> str:
    """Convert a raw model label into a display label.

    Args:
        label: Raw label from the model config (e.g. ``"ai"`` or ``"human"``).

    Returns:
        A capitalized, human-friendly label (``"AI"`` or ``"Human"``).
    """
    mapping = {"ai": "AI", "human": "Human", "mixed": "Mixed"}
    return mapping.get(label.lower(), label.capitalize())


def build_classifier_card(label: str, p_human: float, p_ai: float) -> str:
    """Render the classifier result as a labeled split probability bar.

    Args:
        label: Predicted raw label (``"ai"`` or ``"human"``).
        p_human: Probability that the text is human-written, in ``[0, 1]``.
        p_ai: Probability that the text is AI-generated, in ``[0, 1]``.

    Returns:
        An HTML string with the predicted label and a green/red split bar.
    """
    human_pct = round(p_human * 100)
    ai_pct = 100 - human_pct
    verdict = prettify_label(label)
    verdict_color = CONFIG.ai_color if verdict == "AI" else CONFIG.human_color
    return f"""
<div class="gc-card">
  <div class="gc-verdict">
    Predicted: <span style="color:{verdict_color}">{verdict}</span>
  </div>
  <div class="gc-bar">
    <div class="gc-bar-human" style="width:{human_pct}%"></div>
    <div class="gc-bar-ai" style="width:{ai_pct}%"></div>
  </div>
  <div class="gc-bar-legend">
    <span class="gc-legend-human">Human · {human_pct}%</span>
    <span class="gc-legend-ai">AI · {ai_pct}%</span>
  </div>
</div>
"""


def merge_intervals(intervals: list[AiInterval], text_len: int) -> list[AiInterval]:
    """Merge AI intervals into non-overlapping segments keeping the max score.

    Overlapping or touching predictions are flattened so that every character
    is covered at most once, using the highest score among covering intervals.

    Args:
        intervals: Raw ``(start, end, score)`` predictions.
        text_len: Length of the source text, used to clip the bounds.

    Returns:
        Sorted, non-overlapping ``(start, end, score)`` segments.
    """
    clipped = [
        (max(0, start), min(text_len, end), score)
        for start, end, score in intervals
        if min(text_len, end) > max(0, start)
    ]
    if not clipped:
        return []

    boundaries = sorted({b for start, end, _ in clipped for b in (start, end)})
    segments: list[AiInterval] = []
    for left, right in zip(boundaries, boundaries[1:]):
        covering = [s for st, en, s in clipped if st <= left and en >= right]
        if not covering:
            continue
        score = max(covering)
        if segments and segments[-1][1] == left and segments[-1][2] == score:
            prev_start, _, prev_score = segments[-1]
            segments[-1] = (prev_start, right, prev_score)
        else:
            segments.append((left, right, score))
    return segments


def score_to_alpha(score: float) -> float:
    """Map a confidence score to a background opacity.

    Args:
        score: Confidence score in ``[0, 1]``.

    Returns:
        An opacity in ``[0.15, 1.0]`` so even low scores stay visible.
    """
    return round(0.15 + 0.85 * max(0.0, min(1.0, score)), 3)


def build_highlighted_text(text: str, intervals: list[AiInterval]) -> str:
    """Render text with AI spans highlighted by score-scaled red backgrounds.

    Args:
        text: The source text exactly as passed to the detector.
        intervals: Raw ``(start, end, score)`` predictions.

    Returns:
        An HTML string with AI spans wrapped in colored ``<span>`` elements.
    """
    segments = merge_intervals(intervals, len(text))
    parts: list[str] = []
    cursor = 0
    for start, end, score in segments:
        if start > cursor:
            parts.append(html.escape(text[cursor:start]))
        alpha = score_to_alpha(score)
        chunk = html.escape(text[start:end])
        parts.append(
            f'<span class="gc-ai-span" '
            f'style="background-color: rgba(229, 83, 60, {alpha})" '
            f'title="AI score: {score:.2f}">{chunk}</span>'
        )
        cursor = end
    if cursor < len(text):
        parts.append(html.escape(text[cursor:]))
    return "".join(parts)


def build_detector_card(text: str, intervals: list[AiInterval]) -> str:
    """Render the detector result with a header summary and highlighted text.

    Args:
        text: The source text exactly as passed to the detector.
        intervals: Raw ``(start, end, score)`` predictions.

    Returns:
        An HTML string containing a summary line and the highlighted text.
    """
    segments = merge_intervals(intervals, len(text))
    if segments:
        summary = (
            f"{len(segments)} AI-written fragment(s) detected — "
            "darker red means higher confidence."
        )
        summary_class = "gc-summary gc-summary-ai"
    else:
        summary = "No AI-written fragments detected above the threshold."
        summary_class = "gc-summary gc-summary-clean"
    highlighted = build_highlighted_text(text, intervals)
    return f"""
<div class="gc-card">
  <div class="{summary_class}">{summary}</div>
  <div class="gc-text">{highlighted}</div>
</div>
"""