Spaces:

Blablablab
/

codebook

Paused

App Files Files Community

codebook / potato /server_utils /displays /_trace_normalize.py

davidjurgens

Deploy: Potato — Codebook Annotation

aceb1b2 verified 6 days ago

Raw

History Blame Contribute Delete

5.77 kB

	"""
	Shared agent-trace normalization.

	Both ``agent_trace`` (vertical step cards) and ``eval_trace`` (three-pane
	reasoning \| function calls \| final answer) need to turn heterogeneous trace
	data into a flat list of typed steps. This module is the single source of
	truth for that parsing so the two displays never drift apart.

	A normalized step is a dict with keys:
	type: one of "thought" \| "action" \| "observation" \| "system" \| "error"
	speaker: display label for the step (may be "")
	text: the step's textual content
	timestamp: optional timestamp string ("")
	screenshot: optional screenshot URL ("")

	Supported input formats (see ``normalize_steps``):
	- a single string -> one observation step
	- list of strings -> one step each (type inferred)
	- list of {speaker, text} dicts -> dialogue-style turns
	- list of {thought, action, observation}-> one dict expands to 1-3 steps
	- list of {step_type, content} dicts -> explicit typing
	"""

	import re
	from typing import Any, Dict, List


	# Default background colors for step types (consumed by displays' CSS builders).
	DEFAULT_STEP_COLORS = {
	"thought": "#e8f4fd",
	"action": "#fff3e0",
	"observation": "#e8f5e9",
	"system": "#f3e5f5",
	"error": "#ffebee",
	}

	# Speaker/label substrings that map to a step type.
	SPEAKER_TYPE_PATTERNS = {
	"thought": re.compile(r"(thought\|reasoning\|planning\|think)", re.IGNORECASE),
	"action": re.compile(r"(action\|tool\|function\|call\|execute)", re.IGNORECASE),
	"observation": re.compile(r"(observation\|environment\|result\|output\|response)", re.IGNORECASE),
	"system": re.compile(r"(system\|info\|metadata)", re.IGNORECASE),
	"error": re.compile(r"(error\|fail\|exception)", re.IGNORECASE),
	}


	def infer_type_from_speaker(speaker: str) -> str:
	"""Infer a step type from a speaker/label string."""
	if not speaker:
	return "observation"
	for type_name, pattern in SPEAKER_TYPE_PATTERNS.items():
	if pattern.search(speaker):
	return type_name
	return "observation"


	def infer_type_from_text(text: str) -> str:
	"""Infer a step type from free text content."""
	lower = text.lower()
	if lower.startswith(("i need to", "i should", "let me think", "my plan")):
	return "thought"
	if "(" in text and ")" in text and any(c.isalpha() for c in text.split("(")[0]):
	return "action"
	return "observation"


	def format_action_text(action: Any) -> str:
	"""Render an action value as ``tool(args)`` when it is a structured dict."""
	if isinstance(action, dict):
	tool = action.get("tool", action.get("name", ""))
	params = action.get("params", action.get("parameters", {}))
	if params:
	args = ", ".join(f"{k}={repr(v)}" for k, v in params.items())
	return f"{tool}({args})"
	return f"{tool}()"
	return str(action)


	def normalize_steps(
	data: Any,
	speaker_key: str = "speaker",
	text_key: str = "text",
	) -> List[Dict[str, str]]:
	"""Normalize heterogeneous trace data into a list of typed step dicts.

	See the module docstring for the accepted input formats and the shape of
	each returned step.
	"""
	steps: List[Dict[str, str]] = []

	if isinstance(data, str):
	return [{"type": "observation", "speaker": "", "text": data}]

	if not isinstance(data, list):
	return steps

	for item in data:
	if isinstance(item, str):
	step_type = infer_type_from_text(item)
	steps.append({"type": step_type, "speaker": "", "text": item})
	elif isinstance(item, dict):
	# Format 1: speaker/text (same as dialogue)
	if speaker_key in item and text_key in item:
	speaker = item[speaker_key]
	text = item[text_key]
	step_type = item.get("step_type", infer_type_from_speaker(speaker))
	steps.append({
	"type": step_type,
	"speaker": speaker,
	"text": text,
	"timestamp": item.get("timestamp", ""),
	"screenshot": item.get("screenshot", ""),
	})
	# Format 2: thought/action/observation (one dict = up to 3 steps)
	elif any(k in item for k in ("thought", "action", "observation")):
	if item.get("thought"):
	steps.append({
	"type": "thought",
	"speaker": "Agent (Thought)",
	"text": str(item["thought"]),
	"timestamp": item.get("timestamp", ""),
	})
	if item.get("action"):
	steps.append({
	"type": "action",
	"speaker": "Agent (Action)",
	"text": format_action_text(item["action"]),
	})
	if item.get("observation"):
	steps.append({
	"type": "observation",
	"speaker": "Environment",
	"text": str(item["observation"]),
	"screenshot": item.get("screenshot", ""),
	})
	# Format 3: step_type/content
	elif "step_type" in item:
	steps.append({
	"type": item["step_type"],
	"speaker": item.get("speaker", item.get("step_type", "").capitalize()),
	"text": item.get("content", item.get("text", "")),
	"timestamp": item.get("timestamp", ""),
	"screenshot": item.get("screenshot", ""),
	})

	return steps