File size: 6,057 Bytes
09dd617 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 | """
Agent Trajectory Generator & Decision Point Labeling
Generates synthetic agent trajectories with labeled decision points.
Also loads real trajectories from AgentBench / ToolBench.
"""
import json
import random
from pathlib import Path
from dataclasses import dataclass, field
from typing import List, Optional
@dataclass
class TrajectoryStep:
role: str # "thought", "action", "observation", "error", "plan"
content: str
is_decision_point: bool = False
tokens: List[str] = field(default_factory=list)
@dataclass
class AgentTrajectory:
task: str
steps: List[TrajectoryStep]
total_tokens: int = 0
decision_point_tokens: int = 0
@property
def decision_ratio(self):
if self.total_tokens == 0:
return 0
return self.decision_point_tokens / self.total_tokens
# Decision point patterns
DECISION_PATTERNS = {
"tool_call": [
"I need to call", "Let me use", "Action:", "Tool:", "API call:",
"function_call", "execute(", "search(", "query(",
],
"plan_revision": [
"Let me reconsider", "Actually,", "Wait,", "On second thought",
"I should change", "New plan:", "Revised approach:", "Instead,",
],
"error_recovery": [
"Error:", "Failed:", "Exception:", "Traceback", "retry",
"That didn't work", "Let me try another", "fallback",
],
"state_update": [
"Result:", "Output:", "The answer is", "Found:",
"Updated:", "Status:", "Observation:",
],
}
# Routine patterns (NOT decision points)
ROUTINE_PATTERNS = [
"Let me think about this...",
"Looking at the data...",
"Based on the context...",
"The document mentions...",
"According to the passage...",
"Step {i}: ",
"Processing...",
"Reading the input...",
]
def generate_synthetic_trajectory(
num_steps=20, decision_ratio=0.15, task="multi-hop QA"
) -> AgentTrajectory:
"""Generate a synthetic agent trajectory with labeled decision points."""
steps = []
total_toks = 0
dp_toks = 0
for i in range(num_steps):
is_dp = random.random() < decision_ratio
if is_dp:
# Pick a decision point type
dp_type = random.choice(list(DECISION_PATTERNS.keys()))
pattern = random.choice(DECISION_PATTERNS[dp_type])
if dp_type == "tool_call":
content = f"{pattern} search_api('query about {task} step {i}')"
role = "action"
elif dp_type == "plan_revision":
content = f"{pattern} the approach for step {i} needs adjustment."
role = "thought"
elif dp_type == "error_recovery":
content = f"{pattern} step {i} encountered an issue. Trying alternative."
role = "error"
else:
content = f"{pattern} step {i} yielded new information for {task}."
role = "observation"
else:
pattern = random.choice(ROUTINE_PATTERNS).format(i=i)
content = f"{pattern} analyzing information related to {task}."
role = "thought"
tokens = content.split()
total_toks += len(tokens)
if is_dp:
dp_toks += len(tokens)
steps.append(TrajectoryStep(
role=role, content=content,
is_decision_point=is_dp, tokens=tokens,
))
return AgentTrajectory(
task=task, steps=steps,
total_tokens=total_toks, decision_point_tokens=dp_toks,
)
def generate_dataset(num_trajectories=1000, save_path=None):
"""Generate a dataset of labeled agent trajectories."""
tasks = [
"multi-hop question answering",
"code debugging with tool use",
"web navigation and form filling",
"API orchestration pipeline",
"database query planning",
"research paper analysis",
]
trajectories = []
for i in range(num_trajectories):
task = random.choice(tasks)
num_steps = random.randint(10, 40)
ratio = random.uniform(0.08, 0.25)
traj = generate_synthetic_trajectory(num_steps, ratio, task)
trajectories.append(traj)
# Statistics
ratios = [t.decision_ratio for t in trajectories]
avg_ratio = sum(ratios) / len(ratios)
print(f"Generated {len(trajectories)} trajectories")
print(f"Avg decision ratio: {avg_ratio:.2%}")
print(f"Min/Max ratio: {min(ratios):.2%} / {max(ratios):.2%}")
print(f"Avg steps: {sum(len(t.steps) for t in trajectories) / len(trajectories):.1f}")
if save_path:
save_path = Path(save_path)
save_path.parent.mkdir(parents=True, exist_ok=True)
data = []
for t in trajectories:
data.append({
"task": t.task,
"total_tokens": t.total_tokens,
"decision_point_tokens": t.decision_point_tokens,
"decision_ratio": t.decision_ratio,
"steps": [
{"role": s.role, "content": s.content,
"is_decision_point": s.is_decision_point}
for s in t.steps
],
})
with open(save_path, "w") as f:
json.dump(data, f, indent=2)
print(f"Saved to {save_path}")
return trajectories
def label_decision_points(text: str) -> List[bool]:
"""Label each token in text as decision point or not."""
tokens = text.split()
labels = []
for i, tok in enumerate(tokens):
is_dp = False
context = " ".join(tokens[max(0, i - 3):i + 3])
for patterns in DECISION_PATTERNS.values():
for p in patterns:
if p.lower() in context.lower():
is_dp = True
break
if is_dp:
break
labels.append(is_dp)
return labels
if __name__ == "__main__":
# Generate and save dataset
trajectories = generate_dataset(
num_trajectories=2000,
save_path="data/agent_trajectories.json",
)
|