wu981526092 commited on
Commit
c469fc9
·
1 Parent(s): 1d4dcf3
agentgraph/methods/production/openai_structured_extractor.py CHANGED
@@ -57,15 +57,27 @@ class OpenAIStructuredExtractor:
57
  # System prompt - focus on your role and methodology
58
  system_prompt = """You are an expert knowledge graph analyst specializing in agent system traces.
59
 
60
- Your task is to extract structured knowledge graphs from agent execution traces. You identify entities (Agents, Tasks, Tools, Inputs, Outputs, Humans) and their relationships, providing precise content references when line markers are available.
61
 
62
- CRITICAL PRINCIPLES:
63
- 1. COMPREHENSIVENESS: Include ALL entities that play any role in the system, no matter how minor
64
- 2. CONSISTENCY: Follow the example's level of detail and thoroughness
65
- 3. COMPLETENESS: Every named agent, tool, task, input, and output should be captured
66
- 4. ACCURACY: Match entity types and relationships to the actual trace content
 
 
 
67
 
68
- You always return a complete knowledge graph with meaningful entities, logical relationships, and accurate metadata."""
 
 
 
 
 
 
 
 
 
69
 
70
  # User prompt - specific instructions with few-shot example and data
71
  user_prompt = f"""Analyze this agent system trace and extract a knowledge graph with the following specifications:
@@ -116,6 +128,14 @@ Here's the expected knowledge graph structure for multi-agent collaboration trac
116
  "raw_prompt": "Verify the accuracy of the provided costs for a daily ticket and a season pass for California's Great America in San Jose for the summer of 2024.",
117
  "raw_prompt_ref": [{{"line_start": 8, "line_end": 8}}, {{"line_start": 10, "line_end": 10}}, {{"line_start": 11, "line_end": 12}}]
118
  }},
 
 
 
 
 
 
 
 
119
  {{
120
  "id": "input_001",
121
  "type": "Input",
@@ -189,25 +209,43 @@ Here's the expected knowledge graph structure for multi-agent collaboration trac
189
  }},
190
  {{
191
  "id": "relation_006",
192
- "source": "agent_002",
193
- "target": "task_001",
194
- "type": "INTERVENES",
195
  "importance": "HIGH",
196
- "interaction_prompt": "Agent intervenes in the task process to provide verification and validation",
197
  "interaction_prompt_ref": [{{"line_start": 164, "line_end": 164}}]
198
  }},
199
  {{
200
  "id": "relation_007",
201
- "source": "agent_001",
202
  "target": "agent_004",
203
- "type": "USES",
204
  "importance": "MEDIUM",
205
- "interaction_prompt": "Agent uses Computer Terminal for computational tasks",
206
  "interaction_prompt_ref": [{{"line_start": 50, "line_end": 55}}]
207
  }}
208
  ],
209
- "failures": [],
210
- "optimizations": []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
211
  }}
212
 
213
  Now analyze the following trace data:
@@ -253,20 +291,32 @@ IMPORTANT: Only create content references when you see explicit <L#> line marker
253
 
254
  Also provide system_name and system_summary for the overall system.
255
 
256
- EXTRACTION GUIDELINES:
257
- 1. AGENT IDENTIFICATION: Include every named agent, expert, assistant, or role mentioned in the trace
258
- 2. TOOL DISCOVERY: Capture all computational tools, terminals, systems, analyzers, or utilities
259
- 3. TASK MAPPING: Identify all tasks, objectives, goals, verification steps, and subtasks
260
- 4. INTERACTION TRACKING: Include all inputs, outputs, intermediate results, and data flows
261
- 5. STAKEHOLDER INCLUSION: Identify all human users, requesters, and beneficiaries
 
 
 
 
 
 
 
 
 
 
 
262
 
263
- QUALITY STANDARDS:
264
- - Match the example's thoroughness and attention to detail
265
- - Include entities even if they appear briefly or seem minor
266
- - Ensure every significant component of the system is represented
267
- - Create meaningful relationships that reflect actual interactions
 
268
 
269
- Apply these principles to extract a comprehensive knowledge graph from the following trace data.
270
 
271
  TRACE DATA:
272
  {input_data}"""
 
57
  # System prompt - focus on your role and methodology
58
  system_prompt = """You are an expert knowledge graph analyst specializing in agent system traces.
59
 
60
+ Extract comprehensive knowledge graphs capturing all entities and their precise relationships. Focus on workflow accuracy and relationship completeness.
61
 
62
+ CORE PRINCIPLES:
63
+ 1. Capture ALL participants: agents, tools, tasks, inputs, outputs, humans
64
+ 2. Use professional naming (spaces, not underscores)
65
+ 3. Map complete workflows: Input Agents perform Tasks Output Human
66
+ 4. Connect sequential tasks with NEXT relationships
67
+ 5. Show tool dependencies with REQUIRED_BY relationships
68
+ 6. Identify failures: errors, mistakes, broken processes, incorrect outputs
69
+ 7. Suggest optimizations: improvements, efficiency gains, better approaches
70
 
71
+ RELATIONSHIP TYPES (use exactly these):
72
+ - CONSUMED_BY: Input consumed by agent
73
+ - PERFORMS: Agent performs task
74
+ - NEXT: Task A leads to Task B (critical for workflow)
75
+ - PRODUCES: Task produces output
76
+ - DELIVERS_TO: Output delivered to human
77
+ - REQUIRED_BY: Task needs tool to execute (not USES)
78
+ - USES: Agent uses tool for general support
79
+
80
+ Return complete, accurate knowledge graphs with proper workflow sequences."""
81
 
82
  # User prompt - specific instructions with few-shot example and data
83
  user_prompt = f"""Analyze this agent system trace and extract a knowledge graph with the following specifications:
 
128
  "raw_prompt": "Verify the accuracy of the provided costs for a daily ticket and a season pass for California's Great America in San Jose for the summer of 2024.",
129
  "raw_prompt_ref": [{{"line_start": 8, "line_end": 8}}, {{"line_start": 10, "line_end": 10}}, {{"line_start": 11, "line_end": 12}}]
130
  }},
131
+ {{
132
+ "id": "task_002",
133
+ "type": "Task",
134
+ "name": "Calculate Savings Amount",
135
+ "importance": "HIGH",
136
+ "raw_prompt": "Calculate the amount saved by purchasing a season pass instead of daily tickets for 4 visits.",
137
+ "raw_prompt_ref": [{{"line_start": 119, "line_end": 126}}]
138
+ }},
139
  {{
140
  "id": "input_001",
141
  "type": "Input",
 
209
  }},
210
  {{
211
  "id": "relation_006",
212
+ "source": "task_001",
213
+ "target": "task_002",
214
+ "type": "NEXT",
215
  "importance": "HIGH",
216
+ "interaction_prompt": "Verification task leads to arithmetic calculation task",
217
  "interaction_prompt_ref": [{{"line_start": 164, "line_end": 164}}]
218
  }},
219
  {{
220
  "id": "relation_007",
221
+ "source": "task_002",
222
  "target": "agent_004",
223
+ "type": "REQUIRED_BY",
224
  "importance": "MEDIUM",
225
+ "interaction_prompt": "Calculation task requires computer terminal for execution",
226
  "interaction_prompt_ref": [{{"line_start": 50, "line_end": 55}}]
227
  }}
228
  ],
229
+ "failures": [
230
+ {{
231
+ "id": "failure_001",
232
+ "description": "Verification Expert failed to access real-time pricing data, relying on potentially outdated cost estimates",
233
+ "raw_text": "However, since I am currently unable to access external websites, I will use the provided cost",
234
+ "raw_text_ref": [],
235
+ "affected_id": "agent_002",
236
+ "risk_type": "RETRIEVAL_ERROR"
237
+ }}
238
+ ],
239
+ "optimizations": [
240
+ {{
241
+ "id": "opt_001",
242
+ "description": "Implement automated price verification system to reduce manual verification overhead and improve accuracy",
243
+ "raw_text": "Enhanced price verification with real-time data access",
244
+ "raw_text_ref": [],
245
+ "affected_ids": ["agent_002", "task_001"],
246
+ "recommendation_type": "TOOL_ENHANCEMENT"
247
+ }}
248
+ ]
249
  }}
250
 
251
  Now analyze the following trace data:
 
291
 
292
  Also provide system_name and system_summary for the overall system.
293
 
294
+ EXTRACTION FOCUS:
295
+ 1. Identify ALL named participants (agents, tools, tasks, inputs, outputs, human)
296
+ 2. Create sequential task chains: Task1 NEXT Task2 NEXT Task3
297
+ 3. Show tool dependencies: Task REQUIRED_BY Tool (when task needs tool to execute)
298
+ 4. Use clean professional naming (no underscores)
299
+ 5. Complete workflow: Input CONSUMED_BY Agent PERFORMS Task PRODUCES Output DELIVERS_TO Human
300
+ 6. DETECT FAILURES: Look for errors, exceptions, incorrect results, failed executions, incomplete tasks, missing validations
301
+ 7. SUGGEST OPTIMIZATIONS: Identify inefficiencies, redundancies, improvement opportunities, missing tools, workflow enhancements
302
+
303
+ CRITICAL: Use NEXT for task sequences, REQUIRED_BY for tool dependencies.
304
+
305
+ FAILURE DETECTION (look for):
306
+ - Execution errors, exceptions, failed operations
307
+ - Incorrect outputs, wrong calculations, invalid results
308
+ - Missing validations, incomplete processes
309
+ - Tool limitations, access restrictions
310
+ - Agent coordination problems
311
 
312
+ OPTIMIZATION OPPORTUNITIES (suggest):
313
+ - Tool enhancements, automation possibilities
314
+ - Workflow simplifications, redundancy removal
315
+ - Agent merging or specialization
316
+ - Performance improvements, efficiency gains
317
+ - Missing capabilities or better approaches
318
 
319
+ IMPORTANT: Always provide at least 1-2 failures and optimizations based on trace analysis.
320
 
321
  TRACE DATA:
322
  {input_data}"""