Spaces:
Paused
Paused
File size: 5,767 Bytes
aceb1b2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 | """
Shared agent-trace normalization.
Both ``agent_trace`` (vertical step cards) and ``eval_trace`` (three-pane
reasoning | function calls | final answer) need to turn heterogeneous trace
data into a flat list of typed steps. This module is the single source of
truth for that parsing so the two displays never drift apart.
A normalized step is a dict with keys:
type: one of "thought" | "action" | "observation" | "system" | "error"
speaker: display label for the step (may be "")
text: the step's textual content
timestamp: optional timestamp string ("")
screenshot: optional screenshot URL ("")
Supported input formats (see ``normalize_steps``):
- a single string -> one observation step
- list of strings -> one step each (type inferred)
- list of {speaker, text} dicts -> dialogue-style turns
- list of {thought, action, observation}-> one dict expands to 1-3 steps
- list of {step_type, content} dicts -> explicit typing
"""
import re
from typing import Any, Dict, List
# Default background colors for step types (consumed by displays' CSS builders).
DEFAULT_STEP_COLORS = {
"thought": "#e8f4fd",
"action": "#fff3e0",
"observation": "#e8f5e9",
"system": "#f3e5f5",
"error": "#ffebee",
}
# Speaker/label substrings that map to a step type.
SPEAKER_TYPE_PATTERNS = {
"thought": re.compile(r"(thought|reasoning|planning|think)", re.IGNORECASE),
"action": re.compile(r"(action|tool|function|call|execute)", re.IGNORECASE),
"observation": re.compile(r"(observation|environment|result|output|response)", re.IGNORECASE),
"system": re.compile(r"(system|info|metadata)", re.IGNORECASE),
"error": re.compile(r"(error|fail|exception)", re.IGNORECASE),
}
def infer_type_from_speaker(speaker: str) -> str:
"""Infer a step type from a speaker/label string."""
if not speaker:
return "observation"
for type_name, pattern in SPEAKER_TYPE_PATTERNS.items():
if pattern.search(speaker):
return type_name
return "observation"
def infer_type_from_text(text: str) -> str:
"""Infer a step type from free text content."""
lower = text.lower()
if lower.startswith(("i need to", "i should", "let me think", "my plan")):
return "thought"
if "(" in text and ")" in text and any(c.isalpha() for c in text.split("(")[0]):
return "action"
return "observation"
def format_action_text(action: Any) -> str:
"""Render an action value as ``tool(args)`` when it is a structured dict."""
if isinstance(action, dict):
tool = action.get("tool", action.get("name", ""))
params = action.get("params", action.get("parameters", {}))
if params:
args = ", ".join(f"{k}={repr(v)}" for k, v in params.items())
return f"{tool}({args})"
return f"{tool}()"
return str(action)
def normalize_steps(
data: Any,
speaker_key: str = "speaker",
text_key: str = "text",
) -> List[Dict[str, str]]:
"""Normalize heterogeneous trace data into a list of typed step dicts.
See the module docstring for the accepted input formats and the shape of
each returned step.
"""
steps: List[Dict[str, str]] = []
if isinstance(data, str):
return [{"type": "observation", "speaker": "", "text": data}]
if not isinstance(data, list):
return steps
for item in data:
if isinstance(item, str):
step_type = infer_type_from_text(item)
steps.append({"type": step_type, "speaker": "", "text": item})
elif isinstance(item, dict):
# Format 1: speaker/text (same as dialogue)
if speaker_key in item and text_key in item:
speaker = item[speaker_key]
text = item[text_key]
step_type = item.get("step_type", infer_type_from_speaker(speaker))
steps.append({
"type": step_type,
"speaker": speaker,
"text": text,
"timestamp": item.get("timestamp", ""),
"screenshot": item.get("screenshot", ""),
})
# Format 2: thought/action/observation (one dict = up to 3 steps)
elif any(k in item for k in ("thought", "action", "observation")):
if item.get("thought"):
steps.append({
"type": "thought",
"speaker": "Agent (Thought)",
"text": str(item["thought"]),
"timestamp": item.get("timestamp", ""),
})
if item.get("action"):
steps.append({
"type": "action",
"speaker": "Agent (Action)",
"text": format_action_text(item["action"]),
})
if item.get("observation"):
steps.append({
"type": "observation",
"speaker": "Environment",
"text": str(item["observation"]),
"screenshot": item.get("screenshot", ""),
})
# Format 3: step_type/content
elif "step_type" in item:
steps.append({
"type": item["step_type"],
"speaker": item.get("speaker", item.get("step_type", "").capitalize()),
"text": item.get("content", item.get("text", "")),
"timestamp": item.get("timestamp", ""),
"screenshot": item.get("screenshot", ""),
})
return steps
|