Spaces:

Blablablab
/

codebook

Paused

File size: 5,767 Bytes

aceb1b2

"""
Shared agent-trace normalization.

Both ``agent_trace`` (vertical step cards) and ``eval_trace`` (three-pane
reasoning | function calls | final answer) need to turn heterogeneous trace
data into a flat list of typed steps. This module is the single source of
truth for that parsing so the two displays never drift apart.

A normalized step is a dict with keys:
    type:       one of "thought" | "action" | "observation" | "system" | "error"
    speaker:    display label for the step (may be "")
    text:       the step's textual content
    timestamp:  optional timestamp string ("")
    screenshot: optional screenshot URL ("")

Supported input formats (see ``normalize_steps``):
    - a single string                       -> one observation step
    - list of strings                       -> one step each (type inferred)
    - list of {speaker, text} dicts         -> dialogue-style turns
    - list of {thought, action, observation}-> one dict expands to 1-3 steps
    - list of {step_type, content} dicts    -> explicit typing
"""

import re
from typing import Any, Dict, List


# Default background colors for step types (consumed by displays' CSS builders).
DEFAULT_STEP_COLORS = {
    "thought": "#e8f4fd",
    "action": "#fff3e0",
    "observation": "#e8f5e9",
    "system": "#f3e5f5",
    "error": "#ffebee",
}

# Speaker/label substrings that map to a step type.
SPEAKER_TYPE_PATTERNS = {
    "thought": re.compile(r"(thought|reasoning|planning|think)", re.IGNORECASE),
    "action": re.compile(r"(action|tool|function|call|execute)", re.IGNORECASE),
    "observation": re.compile(r"(observation|environment|result|output|response)", re.IGNORECASE),
    "system": re.compile(r"(system|info|metadata)", re.IGNORECASE),
    "error": re.compile(r"(error|fail|exception)", re.IGNORECASE),
}


def infer_type_from_speaker(speaker: str) -> str:
    """Infer a step type from a speaker/label string."""
    if not speaker:
        return "observation"
    for type_name, pattern in SPEAKER_TYPE_PATTERNS.items():
        if pattern.search(speaker):
            return type_name
    return "observation"


def infer_type_from_text(text: str) -> str:
    """Infer a step type from free text content."""
    lower = text.lower()
    if lower.startswith(("i need to", "i should", "let me think", "my plan")):
        return "thought"
    if "(" in text and ")" in text and any(c.isalpha() for c in text.split("(")[0]):
        return "action"
    return "observation"


def format_action_text(action: Any) -> str:
    """Render an action value as ``tool(args)`` when it is a structured dict."""
    if isinstance(action, dict):
        tool = action.get("tool", action.get("name", ""))
        params = action.get("params", action.get("parameters", {}))
        if params:
            args = ", ".join(f"{k}={repr(v)}" for k, v in params.items())
            return f"{tool}({args})"
        return f"{tool}()"
    return str(action)


def normalize_steps(
    data: Any,
    speaker_key: str = "speaker",
    text_key: str = "text",
) -> List[Dict[str, str]]:
    """Normalize heterogeneous trace data into a list of typed step dicts.

    See the module docstring for the accepted input formats and the shape of
    each returned step.
    """
    steps: List[Dict[str, str]] = []

    if isinstance(data, str):
        return [{"type": "observation", "speaker": "", "text": data}]

    if not isinstance(data, list):
        return steps

    for item in data:
        if isinstance(item, str):
            step_type = infer_type_from_text(item)
            steps.append({"type": step_type, "speaker": "", "text": item})
        elif isinstance(item, dict):
            # Format 1: speaker/text (same as dialogue)
            if speaker_key in item and text_key in item:
                speaker = item[speaker_key]
                text = item[text_key]
                step_type = item.get("step_type", infer_type_from_speaker(speaker))
                steps.append({
                    "type": step_type,
                    "speaker": speaker,
                    "text": text,
                    "timestamp": item.get("timestamp", ""),
                    "screenshot": item.get("screenshot", ""),
                })
            # Format 2: thought/action/observation (one dict = up to 3 steps)
            elif any(k in item for k in ("thought", "action", "observation")):
                if item.get("thought"):
                    steps.append({
                        "type": "thought",
                        "speaker": "Agent (Thought)",
                        "text": str(item["thought"]),
                        "timestamp": item.get("timestamp", ""),
                    })
                if item.get("action"):
                    steps.append({
                        "type": "action",
                        "speaker": "Agent (Action)",
                        "text": format_action_text(item["action"]),
                    })
                if item.get("observation"):
                    steps.append({
                        "type": "observation",
                        "speaker": "Environment",
                        "text": str(item["observation"]),
                        "screenshot": item.get("screenshot", ""),
                    })
            # Format 3: step_type/content
            elif "step_type" in item:
                steps.append({
                    "type": item["step_type"],
                    "speaker": item.get("speaker", item.get("step_type", "").capitalize()),
                    "text": item.get("content", item.get("text", "")),
                    "timestamp": item.get("timestamp", ""),
                    "screenshot": item.get("screenshot", ""),
                })

    return steps