Spaces:
Sleeping
Sleeping
Commit ·
e20ad3b
1
Parent(s): 13d4394
add
Browse files- agentgraph/reconstruction/prompt_reconstructor.py +37 -21
- analyze_real_agent_trace.py +280 -0
- backend/database/samples/INTEGRATION_SUMMARY.md +0 -114
- backend/database/samples/REAL_AI_INTEGRATION_SUCCESS.md +0 -119
- backend/database/samples/knowledge_graphs/kg_algorithm_sample_0_realistic.json +283 -0
- create_realistic_prompt_reconstruction.py +362 -0
- validate_enhanced_reconstruction.py +94 -0
agentgraph/reconstruction/prompt_reconstructor.py
CHANGED
|
@@ -243,16 +243,22 @@ class PromptReconstructor:
|
|
| 243 |
# Format: Agent system prompt + tools + task as user message + additional context
|
| 244 |
task_message = interaction if interaction else task_prompt
|
| 245 |
|
| 246 |
-
#
|
| 247 |
system_role = f"system: You are {source['name']}. "
|
| 248 |
system_description = source.get("description", "")
|
| 249 |
if system_description:
|
| 250 |
system_role += f"You're an expert in {system_description.split(' responsible for ')[0].lower() if ' responsible for ' in system_description else system_description.lower()}.\n"
|
| 251 |
-
system_role += f"Your personal goal is: {system_description}\n"
|
| 252 |
else:
|
| 253 |
-
system_role += "\n"
|
| 254 |
-
|
| 255 |
-
# Add
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 256 |
system_role += "You ONLY have access to the following tools, and should NEVER make up tools that are not listed here:\n\n"
|
| 257 |
|
| 258 |
# Place system prompt first
|
|
@@ -286,20 +292,22 @@ class PromptReconstructor:
|
|
| 286 |
complete_prompt += f"Tool Arguments: {tool_args}\n"
|
| 287 |
complete_prompt += f"Tool Description: {tool_desc}\n\n"
|
| 288 |
|
| 289 |
-
#
|
| 290 |
-
complete_prompt += "
|
| 291 |
complete_prompt += "Copy code\n"
|
| 292 |
complete_prompt += "```\n"
|
| 293 |
-
complete_prompt += "
|
| 294 |
-
complete_prompt += "
|
| 295 |
-
complete_prompt += "
|
| 296 |
-
complete_prompt += "
|
|
|
|
|
|
|
| 297 |
complete_prompt += "```\n\n"
|
| 298 |
-
complete_prompt += "
|
| 299 |
complete_prompt += "Copy code\n"
|
| 300 |
complete_prompt += "```\n"
|
| 301 |
-
complete_prompt += "
|
| 302 |
-
complete_prompt += "Final Answer:
|
| 303 |
complete_prompt += "```\n\n"
|
| 304 |
|
| 305 |
# Add required tools and sequence context information
|
|
@@ -307,22 +315,30 @@ class PromptReconstructor:
|
|
| 307 |
if required_tools_info or sequence_context:
|
| 308 |
context_info = f"{required_tools_info}{sequence_context}\n"
|
| 309 |
|
| 310 |
-
#
|
| 311 |
formatted_task_message = f"user:\nCurrent Task: {task_message}\n"
|
| 312 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 313 |
# Add expected criteria as shown in example
|
| 314 |
if target["type"] == "Task" and target.get("description"):
|
| 315 |
-
formatted_task_message += f"
|
| 316 |
|
| 317 |
-
# Add standard completion instructions
|
| 318 |
-
formatted_task_message += "
|
| 319 |
|
| 320 |
# Add context section if there's additional context available
|
| 321 |
if context_info:
|
| 322 |
-
formatted_task_message += f"
|
| 323 |
|
| 324 |
-
# Add motivation
|
| 325 |
-
formatted_task_message += "Begin!
|
| 326 |
|
| 327 |
# Add user message at the end
|
| 328 |
complete_prompt += formatted_task_message
|
|
|
|
| 243 |
# Format: Agent system prompt + tools + task as user message + additional context
|
| 244 |
task_message = interaction if interaction else task_prompt
|
| 245 |
|
| 246 |
+
# Enhanced system prompt with realistic agent reasoning patterns
|
| 247 |
system_role = f"system: You are {source['name']}. "
|
| 248 |
system_description = source.get("description", "")
|
| 249 |
if system_description:
|
| 250 |
system_role += f"You're an expert in {system_description.split(' responsible for ')[0].lower() if ' responsible for ' in system_description else system_description.lower()}.\n"
|
| 251 |
+
system_role += f"Your personal goal is: {system_description}\n\n"
|
| 252 |
else:
|
| 253 |
+
system_role += "\n\n"
|
| 254 |
+
|
| 255 |
+
# Add realistic agent behavior instructions based on real traces
|
| 256 |
+
system_role += "CRITICAL INSTRUCTIONS:\n"
|
| 257 |
+
system_role += "1. You must analyze the task step by step before taking action\n"
|
| 258 |
+
system_role += "2. Always reference the user's exact request in your reasoning\n"
|
| 259 |
+
system_role += "3. Break down complex tasks into sequential steps\n"
|
| 260 |
+
system_role += "4. Choose appropriate tools for each step and explain your reasoning\n"
|
| 261 |
+
system_role += "5. Be aware of system constraints and adapt accordingly\n\n"
|
| 262 |
system_role += "You ONLY have access to the following tools, and should NEVER make up tools that are not listed here:\n\n"
|
| 263 |
|
| 264 |
# Place system prompt first
|
|
|
|
| 292 |
complete_prompt += f"Tool Arguments: {tool_args}\n"
|
| 293 |
complete_prompt += f"Tool Description: {tool_desc}\n\n"
|
| 294 |
|
| 295 |
+
# Enhanced response format with realistic agent reasoning patterns
|
| 296 |
+
complete_prompt += "RESPONSE FORMAT - Follow this structure for each step:\n\n"
|
| 297 |
complete_prompt += "Copy code\n"
|
| 298 |
complete_prompt += "```\n"
|
| 299 |
+
complete_prompt += "Reasoning: [Analyze the current situation and explain your thinking process]\n"
|
| 300 |
+
complete_prompt += "Task Analysis: [Break down what needs to be done and identify requirements]\n"
|
| 301 |
+
complete_prompt += "Tool Selection: [Choose appropriate tool and justify why]\n"
|
| 302 |
+
complete_prompt += "Action: [Tool name from: " + ", ".join([t.split("Tool Name:")[1].strip().split("\n")[0] for t in tool_definitions if "Tool Name:" in t]) + "]\n"
|
| 303 |
+
complete_prompt += "Action Input: [JSON object with parameters, using \" for keys and values]\n"
|
| 304 |
+
complete_prompt += "Observation: [Result of the action]\n"
|
| 305 |
complete_prompt += "```\n\n"
|
| 306 |
+
complete_prompt += "For your final response:\n\n"
|
| 307 |
complete_prompt += "Copy code\n"
|
| 308 |
complete_prompt += "```\n"
|
| 309 |
+
complete_prompt += "Final Reasoning: [Summarize your complete analysis and decision process]\n"
|
| 310 |
+
complete_prompt += "Final Answer: [Complete answer addressing all requirements]\n"
|
| 311 |
complete_prompt += "```\n\n"
|
| 312 |
|
| 313 |
# Add required tools and sequence context information
|
|
|
|
| 315 |
if required_tools_info or sequence_context:
|
| 316 |
context_info = f"{required_tools_info}{sequence_context}\n"
|
| 317 |
|
| 318 |
+
# Enhanced user message with realistic agent reasoning prompts
|
| 319 |
formatted_task_message = f"user:\nCurrent Task: {task_message}\n"
|
| 320 |
|
| 321 |
+
# Add step-by-step reasoning instruction based on real agent patterns
|
| 322 |
+
formatted_task_message += "\nYour reasoning process should follow this pattern:\n"
|
| 323 |
+
formatted_task_message += "1. Analyze the user's request and identify key requirements\n"
|
| 324 |
+
formatted_task_message += "2. Break down the task into sequential steps\n"
|
| 325 |
+
formatted_task_message += "3. For each step, determine which tools to use and why\n"
|
| 326 |
+
formatted_task_message += "4. Consider system constraints and potential issues\n"
|
| 327 |
+
formatted_task_message += "5. Execute the plan while monitoring for problems\n\n"
|
| 328 |
+
|
| 329 |
# Add expected criteria as shown in example
|
| 330 |
if target["type"] == "Task" and target.get("description"):
|
| 331 |
+
formatted_task_message += f"Expected criteria for your final answer: {target.get('description')}\n"
|
| 332 |
|
| 333 |
+
# Add standard completion instructions with emphasis on reasoning
|
| 334 |
+
formatted_task_message += "IMPORTANT: You MUST show your step-by-step reasoning process and return the actual complete content as the final answer, not a summary.\n\n"
|
| 335 |
|
| 336 |
# Add context section if there's additional context available
|
| 337 |
if context_info:
|
| 338 |
+
formatted_task_message += f"Context you're working with:\n{context_info}\n"
|
| 339 |
|
| 340 |
+
# Add motivation with realistic urgency
|
| 341 |
+
formatted_task_message += "Begin! Remember to think through each step carefully, use the available tools appropriately, and provide your best Final Answer. Your systematic approach is crucial for success!"
|
| 342 |
|
| 343 |
# Add user message at the end
|
| 344 |
complete_prompt += formatted_task_message
|
analyze_real_agent_trace.py
ADDED
|
@@ -0,0 +1,280 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Analyze Real Agent Trace for Prompt Reconstruction Enhancement
|
| 4 |
+
|
| 5 |
+
This script analyzes the provided real agent trace to understand how we can
|
| 6 |
+
enhance our prompt reconstruction to better simulate actual agent reasoning
|
| 7 |
+
patterns and internal thought processes.
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
import json
|
| 11 |
+
import sys
|
| 12 |
+
import os
|
| 13 |
+
sys.path.append('/Users/zekunwu/Desktop/agent_monitoring/huggingface/AgentGraph')
|
| 14 |
+
|
| 15 |
+
def analyze_real_trace_structure():
|
| 16 |
+
"""Analyze the structure of the real agent trace."""
|
| 17 |
+
|
| 18 |
+
print("🔍 REAL AGENT TRACE ANALYSIS")
|
| 19 |
+
print("=" * 70)
|
| 20 |
+
|
| 21 |
+
# Simulate the provided trace structure
|
| 22 |
+
real_trace_example = {
|
| 23 |
+
"type": "ai",
|
| 24 |
+
"content": "",
|
| 25 |
+
"additional_kwargs": {
|
| 26 |
+
"reasoning_content": """We need to act as supervisor agent. The last user message: "write me final report for q1 2024!" We must follow instructions. The last message is the task. We need to produce final report. We must do steps: 1. Summarize request, period. 2. Retrieve data using get_order_data_for_period. Then delegate to revenue analyst, etc. We need to call tools accordingly. Let's do step 1: produce plan. Then step 2: call get_order_data_for_period with start_date 2024-01-01 end_date 2024-03-31...""",
|
| 27 |
+
"tool_calls": [
|
| 28 |
+
{
|
| 29 |
+
"id": "fc_2372f7b7-2858-4b94-b5b2-4441ed19b66f",
|
| 30 |
+
"function": {
|
| 31 |
+
"arguments": {
|
| 32 |
+
"end_date": "2024-03-31",
|
| 33 |
+
"save_to_filename": "2024-01-01_to_2024-03-31_order.json",
|
| 34 |
+
"start_date": "2024-01-01"
|
| 35 |
+
},
|
| 36 |
+
"name": "get_order_data_for_period"
|
| 37 |
+
},
|
| 38 |
+
"type": "function"
|
| 39 |
+
}
|
| 40 |
+
]
|
| 41 |
+
},
|
| 42 |
+
"response_metadata": {
|
| 43 |
+
"token_usage": {
|
| 44 |
+
"completion_tokens": 533,
|
| 45 |
+
"prompt_tokens": 2105,
|
| 46 |
+
"total_tokens": 2638
|
| 47 |
+
},
|
| 48 |
+
"model_name": "openai/gpt-oss-20b"
|
| 49 |
+
}
|
| 50 |
+
}
|
| 51 |
+
|
| 52 |
+
print("📊 Key Components in Real Agent Trace:")
|
| 53 |
+
print("─" * 50)
|
| 54 |
+
print("✅ reasoning_content: Agent's internal thinking process")
|
| 55 |
+
print("✅ tool_calls: Structured function calls with arguments")
|
| 56 |
+
print("✅ response_metadata: Token usage and model information")
|
| 57 |
+
print("✅ content: Public response (often empty when using tools)")
|
| 58 |
+
print()
|
| 59 |
+
|
| 60 |
+
reasoning_content = real_trace_example["additional_kwargs"]["reasoning_content"]
|
| 61 |
+
print(f"🧠 Reasoning Content Analysis:")
|
| 62 |
+
print(f" Length: {len(reasoning_content)} characters")
|
| 63 |
+
print(f" Contains planning: {'step' in reasoning_content.lower()}")
|
| 64 |
+
print(f" Contains decision making: {'need to' in reasoning_content.lower()}")
|
| 65 |
+
print(f" Contains self-reflection: {'but' in reasoning_content.lower()}")
|
| 66 |
+
print()
|
| 67 |
+
|
| 68 |
+
tool_call = real_trace_example["additional_kwargs"]["tool_calls"][0]
|
| 69 |
+
print(f"🔧 Tool Call Analysis:")
|
| 70 |
+
print(f" Function: {tool_call['function']['name']}")
|
| 71 |
+
print(f" Arguments: {len(tool_call['function']['arguments'])} parameters")
|
| 72 |
+
print(f" ID: {tool_call['id']}")
|
| 73 |
+
|
| 74 |
+
def extract_agent_reasoning_patterns():
|
| 75 |
+
"""Extract and analyze agent reasoning patterns from the trace."""
|
| 76 |
+
|
| 77 |
+
print(f"\n🎯 AGENT REASONING PATTERNS")
|
| 78 |
+
print("=" * 70)
|
| 79 |
+
|
| 80 |
+
# Extract key reasoning patterns from the provided trace
|
| 81 |
+
reasoning_patterns = {
|
| 82 |
+
"task_analysis": {
|
| 83 |
+
"pattern": "The last user message: \"write me final report for q1 2024!\"",
|
| 84 |
+
"description": "Agent identifies and quotes the user's request"
|
| 85 |
+
},
|
| 86 |
+
"instruction_following": {
|
| 87 |
+
"pattern": "We must follow instructions. The last message is the task.",
|
| 88 |
+
"description": "Agent acknowledges constraints and task definition"
|
| 89 |
+
},
|
| 90 |
+
"step_planning": {
|
| 91 |
+
"pattern": "We must do steps: 1. Summarize request, period. 2. Retrieve data...",
|
| 92 |
+
"description": "Agent breaks down complex tasks into sequential steps"
|
| 93 |
+
},
|
| 94 |
+
"tool_selection": {
|
| 95 |
+
"pattern": "We need to call tools accordingly. Let's do step 1: produce plan. Then step 2: call get_order_data_for_period",
|
| 96 |
+
"description": "Agent selects appropriate tools for each step"
|
| 97 |
+
},
|
| 98 |
+
"parameter_reasoning": {
|
| 99 |
+
"pattern": "with start_date 2024-01-01 end_date 2024-03-31",
|
| 100 |
+
"description": "Agent reasons about tool parameters based on context"
|
| 101 |
+
},
|
| 102 |
+
"constraint_awareness": {
|
| 103 |
+
"pattern": "But we don't have actual agent. We can simulate?",
|
| 104 |
+
"description": "Agent recognizes system limitations and adapts"
|
| 105 |
+
},
|
| 106 |
+
"self_correction": {
|
| 107 |
+
"pattern": "Maybe we can skip and produce final report with placeholders? But instructions say must not end until all analyses done.",
|
| 108 |
+
"description": "Agent evaluates options and corrects course"
|
| 109 |
+
}
|
| 110 |
+
}
|
| 111 |
+
|
| 112 |
+
for pattern_name, pattern_info in reasoning_patterns.items():
|
| 113 |
+
print(f"📋 {pattern_name.replace('_', ' ').title()}:")
|
| 114 |
+
print(f" Pattern: {pattern_info['pattern'][:100]}...")
|
| 115 |
+
print(f" Purpose: {pattern_info['description']}")
|
| 116 |
+
print()
|
| 117 |
+
|
| 118 |
+
def design_enhanced_prompt_reconstruction():
|
| 119 |
+
"""Design enhanced prompt reconstruction that captures agent reasoning."""
|
| 120 |
+
|
| 121 |
+
print(f"🚀 ENHANCED PROMPT RECONSTRUCTION DESIGN")
|
| 122 |
+
print("=" * 70)
|
| 123 |
+
|
| 124 |
+
enhanced_structure = {
|
| 125 |
+
"system_prompt": {
|
| 126 |
+
"role_definition": "You are a supervisor agent responsible for coordinating multi-agent workflows",
|
| 127 |
+
"reasoning_instructions": "Think through each step carefully. Show your reasoning process explicitly.",
|
| 128 |
+
"constraint_awareness": "Be aware of system limitations and adapt accordingly",
|
| 129 |
+
"tool_usage": "Select appropriate tools for each task step"
|
| 130 |
+
},
|
| 131 |
+
"user_message": {
|
| 132 |
+
"task_definition": "write me final report for q1 2024!",
|
| 133 |
+
"context": "Previous conversations and system state",
|
| 134 |
+
"expectations": "Follow the workflow: analyze → delegate → compile → report"
|
| 135 |
+
},
|
| 136 |
+
"reasoning_framework": {
|
| 137 |
+
"task_analysis": "Identify and quote the user's request",
|
| 138 |
+
"instruction_parsing": "Acknowledge constraints and requirements",
|
| 139 |
+
"step_planning": "Break down complex tasks into sequential steps",
|
| 140 |
+
"tool_selection": "Choose appropriate tools for each step",
|
| 141 |
+
"parameter_reasoning": "Reason about tool parameters based on context",
|
| 142 |
+
"constraint_handling": "Recognize limitations and adapt strategy",
|
| 143 |
+
"self_correction": "Evaluate options and correct course when needed"
|
| 144 |
+
},
|
| 145 |
+
"expected_output": {
|
| 146 |
+
"reasoning_content": "Detailed internal thinking process",
|
| 147 |
+
"tool_calls": "Structured function calls with reasoned arguments",
|
| 148 |
+
"content": "Public response or empty when using tools"
|
| 149 |
+
}
|
| 150 |
+
}
|
| 151 |
+
|
| 152 |
+
print("📝 Enhanced Reconstruction Components:")
|
| 153 |
+
print("─" * 50)
|
| 154 |
+
|
| 155 |
+
for component, details in enhanced_structure.items():
|
| 156 |
+
print(f"✅ {component.replace('_', ' ').title()}:")
|
| 157 |
+
if isinstance(details, dict):
|
| 158 |
+
for key, value in details.items():
|
| 159 |
+
print(f" • {key.replace('_', ' ').title()}: {value}")
|
| 160 |
+
else:
|
| 161 |
+
print(f" {details}")
|
| 162 |
+
print()
|
| 163 |
+
|
| 164 |
+
def create_enhanced_agent_prompt_template():
|
| 165 |
+
"""Create an enhanced agent prompt template based on real trace analysis."""
|
| 166 |
+
|
| 167 |
+
print(f"📄 ENHANCED AGENT PROMPT TEMPLATE")
|
| 168 |
+
print("=" * 70)
|
| 169 |
+
|
| 170 |
+
template = """system: You are a {agent_role} responsible for {agent_capabilities}.
|
| 171 |
+
|
| 172 |
+
Your reasoning process should be explicit and structured:
|
| 173 |
+
1. Task Analysis: Identify and understand the user's request
|
| 174 |
+
2. Instruction Parsing: Acknowledge constraints and requirements
|
| 175 |
+
3. Step Planning: Break down complex tasks into sequential steps
|
| 176 |
+
4. Tool Selection: Choose appropriate tools for each step
|
| 177 |
+
5. Parameter Reasoning: Reason about tool parameters based on context
|
| 178 |
+
6. Constraint Handling: Recognize limitations and adapt strategy
|
| 179 |
+
7. Self Correction: Evaluate options and correct course when needed
|
| 180 |
+
|
| 181 |
+
Available Tools:
|
| 182 |
+
{tool_definitions}
|
| 183 |
+
|
| 184 |
+
Response Format:
|
| 185 |
+
- Use reasoning_content to show your internal thinking process
|
| 186 |
+
- Make structured tool calls with reasoned arguments
|
| 187 |
+
- Provide public content only when not using tools
|
| 188 |
+
|
| 189 |
+
Instructions:
|
| 190 |
+
{specific_instructions}
|
| 191 |
+
|
| 192 |
+
user: {user_message}
|
| 193 |
+
|
| 194 |
+
Expected Workflow:
|
| 195 |
+
{workflow_steps}
|
| 196 |
+
|
| 197 |
+
Remember: Show your reasoning explicitly. Think through each decision step by step."""
|
| 198 |
+
|
| 199 |
+
print("📋 Template Structure:")
|
| 200 |
+
print(template)
|
| 201 |
+
|
| 202 |
+
print(f"\n🎯 Template Features:")
|
| 203 |
+
print("─" * 30)
|
| 204 |
+
print("✅ Explicit reasoning instructions")
|
| 205 |
+
print("✅ Structured thinking framework")
|
| 206 |
+
print("✅ Tool usage guidance")
|
| 207 |
+
print("✅ Response format specifications")
|
| 208 |
+
print("✅ Workflow expectations")
|
| 209 |
+
print("✅ Self-reflection encouragement")
|
| 210 |
+
|
| 211 |
+
def demonstrate_enhanced_reconstruction():
|
| 212 |
+
"""Demonstrate how to apply enhanced reconstruction to our sample."""
|
| 213 |
+
|
| 214 |
+
print(f"\n🎭 ENHANCED RECONSTRUCTION DEMONSTRATION")
|
| 215 |
+
print("=" * 70)
|
| 216 |
+
|
| 217 |
+
# Enhanced reconstruction for our algorithm sample
|
| 218 |
+
enhanced_verification_agent = {
|
| 219 |
+
"agent_role": "Verification Expert",
|
| 220 |
+
"agent_capabilities": "validating information accuracy and conducting detailed analysis",
|
| 221 |
+
"specific_instructions": """Your task is to verify the accuracy of provided costs for daily tickets and season passes for California's Great America in San Jose for summer 2024.
|
| 222 |
+
|
| 223 |
+
You must:
|
| 224 |
+
1. Confirm the cost of a daily ticket for California's Great America in 2024
|
| 225 |
+
2. Confirm the cost of a season pass for California's Great America in 2024
|
| 226 |
+
3. Provide verified results with explanations
|
| 227 |
+
|
| 228 |
+
Constraints:
|
| 229 |
+
- Costs must be accurate and reflect 2024 summer prices
|
| 230 |
+
- Show your verification methodology
|
| 231 |
+
- Explain your reasoning process""",
|
| 232 |
+
"user_message": "How much did I save by purchasing a season pass instead of daily tickets for California's Great America in San Jose, if I planned to visit once a month in June, July, August, and September during the summer of 2024?",
|
| 233 |
+
"workflow_steps": "1. Analyze the question → 2. Verify ticket prices → 3. Calculate savings → 4. Provide detailed explanation",
|
| 234 |
+
"expected_reasoning": """Let me analyze this step by step. The user is asking about savings from a season pass vs daily tickets. I need to:
|
| 235 |
+
|
| 236 |
+
1. Identify the specific venue: California's Great America in San Jose
|
| 237 |
+
2. Confirm current pricing for both daily tickets and season passes for 2024
|
| 238 |
+
3. Calculate cost for 4 visits (June, July, August, September)
|
| 239 |
+
4. Compare total costs and determine savings
|
| 240 |
+
|
| 241 |
+
First, let me verify the current pricing. Based on historical patterns and typical amusement park pricing..."""
|
| 242 |
+
}
|
| 243 |
+
|
| 244 |
+
print("🎪 Enhanced Verification Agent Reconstruction:")
|
| 245 |
+
print("─" * 50)
|
| 246 |
+
print(f"Role: {enhanced_verification_agent['agent_role']}")
|
| 247 |
+
print(f"Capabilities: {enhanced_verification_agent['agent_capabilities']}")
|
| 248 |
+
print(f"Task: {enhanced_verification_agent['user_message'][:100]}...")
|
| 249 |
+
print(f"Expected Reasoning Length: {len(enhanced_verification_agent['expected_reasoning'])} characters")
|
| 250 |
+
print()
|
| 251 |
+
print("📊 This captures the detailed thinking process similar to the real trace!")
|
| 252 |
+
|
| 253 |
+
def main():
|
| 254 |
+
"""Main analysis function."""
|
| 255 |
+
|
| 256 |
+
analyze_real_trace_structure()
|
| 257 |
+
extract_agent_reasoning_patterns()
|
| 258 |
+
design_enhanced_prompt_reconstruction()
|
| 259 |
+
create_enhanced_agent_prompt_template()
|
| 260 |
+
demonstrate_enhanced_reconstruction()
|
| 261 |
+
|
| 262 |
+
print(f"\n🎉 CONCLUSION")
|
| 263 |
+
print("=" * 70)
|
| 264 |
+
print("✅ Real agent traces contain rich reasoning_content that shows:")
|
| 265 |
+
print(" • Step-by-step thinking processes")
|
| 266 |
+
print(" • Tool selection reasoning")
|
| 267 |
+
print(" • Constraint awareness and adaptation")
|
| 268 |
+
print(" • Self-correction and replanning")
|
| 269 |
+
print()
|
| 270 |
+
print("🎯 Our prompt reconstruction should capture this by:")
|
| 271 |
+
print(" • Adding explicit reasoning instructions")
|
| 272 |
+
print(" • Structuring thinking frameworks")
|
| 273 |
+
print(" • Encouraging step-by-step analysis")
|
| 274 |
+
print(" • Modeling internal decision processes")
|
| 275 |
+
print()
|
| 276 |
+
print("🚀 This will enable more realistic perturbation testing and")
|
| 277 |
+
print(" causal analysis by capturing actual agent cognition patterns!")
|
| 278 |
+
|
| 279 |
+
if __name__ == "__main__":
|
| 280 |
+
main()
|
backend/database/samples/INTEGRATION_SUMMARY.md
DELETED
|
@@ -1,114 +0,0 @@
|
|
| 1 |
-
# AgentGraph Sample Data System Integration Summary
|
| 2 |
-
|
| 3 |
-
## 🎉 **完成的工作概述**
|
| 4 |
-
|
| 5 |
-
我们成功完成了从硬编码 Python 数据到基于 JSON 的模块化 sample system 的重构,并集成了 algorithm-generated.jsonl 中的真实样本。
|
| 6 |
-
|
| 7 |
-
## 📋 **主要成就**
|
| 8 |
-
|
| 9 |
-
### 1. **Sample Data System 重构** ✅
|
| 10 |
-
|
| 11 |
-
- **从**:单一的`sample_data.py`文件包含硬编码数据
|
| 12 |
-
- **到**:模块化的 JSON-based 系统,数据和代码分离
|
| 13 |
-
|
| 14 |
-
### 2. **新的文件结构** ✅
|
| 15 |
-
|
| 16 |
-
```
|
| 17 |
-
backend/database/samples/
|
| 18 |
-
├── README.md # 详细文档
|
| 19 |
-
├── samples_config.json # 主配置文件
|
| 20 |
-
├── extract_algorithm_sample.py # 提取工具
|
| 21 |
-
├── add_algorithm_sample_example.py # 集成示例
|
| 22 |
-
├── traces/ # Trace数据目录
|
| 23 |
-
│ ├── python_documentation_inquiry.json
|
| 24 |
-
│ └── algorithm_sample_0.json
|
| 25 |
-
└── knowledge_graphs/ # Knowledge Graph数据目录
|
| 26 |
-
├── kg_python_documentation_enhanced.json
|
| 27 |
-
└── kg_algorithm_sample_0.json
|
| 28 |
-
```
|
| 29 |
-
|
| 30 |
-
### 3. **Algorithm 样本集成** ✅
|
| 31 |
-
|
| 32 |
-
- **提取**:从`algorithm-generated.jsonl`中成功提取样本#0
|
| 33 |
-
- **转换**:转换为 AgentGraph 标准格式
|
| 34 |
-
- **知识图谱**:生成 mock 知识图谱演示完整结构
|
| 35 |
-
- **集成**:完全集成到新的 JSON 系统中
|
| 36 |
-
|
| 37 |
-
## 📊 **系统状态**
|
| 38 |
-
|
| 39 |
-
### **当前样本总数**: 2
|
| 40 |
-
|
| 41 |
-
1. **Python Documentation Assistant Demo**
|
| 42 |
-
|
| 43 |
-
- 类型:`documentation_search`
|
| 44 |
-
- 来源:`sample_data`
|
| 45 |
-
- 特性:RAG 搜索、失败检测、优化建议
|
| 46 |
-
|
| 47 |
-
2. **Multi-Agent Arithmetic Problem Solver**
|
| 48 |
-
- 类型:`multi_agent_collaboration`
|
| 49 |
-
- 来源:`algorithm_generated`
|
| 50 |
-
- 特性:真实失败案例、多智能体协作、验证错误
|
| 51 |
-
|
| 52 |
-
### **系统特性**
|
| 53 |
-
|
| 54 |
-
- ✅ **可扩展**:添加新样本只需添加 JSON 文件
|
| 55 |
-
- ✅ **可维护**:数据和代码完全分离
|
| 56 |
-
- ✅ **向后兼容**:保持相同的 API 接口
|
| 57 |
-
- ✅ **丰富多样**:包含成功和失败案例
|
| 58 |
-
- ✅ **真实数据**:来自真实的多智能体系统
|
| 59 |
-
|
| 60 |
-
## 🛠️ **使用方法**
|
| 61 |
-
|
| 62 |
-
### **添加新样本**
|
| 63 |
-
|
| 64 |
-
1. 将 trace JSON 文件放入`traces/`目录
|
| 65 |
-
2. 将 knowledge graph JSON 文件放入`knowledge_graphs/`目录
|
| 66 |
-
3. 在`samples_config.json`中添加配置条目
|
| 67 |
-
4. 系统将自动加载新样本
|
| 68 |
-
|
| 69 |
-
### **提取 algorithm 样本**
|
| 70 |
-
|
| 71 |
-
```bash
|
| 72 |
-
cd samples
|
| 73 |
-
python extract_algorithm_sample.py /path/to/algorithm-generated.jsonl <sample_id>
|
| 74 |
-
```
|
| 75 |
-
|
| 76 |
-
## 🔮 **下一步计划**
|
| 77 |
-
|
| 78 |
-
### **待解决的问题**
|
| 79 |
-
|
| 80 |
-
- **API Key 认证**:需要有效的 OpenAI API key 来生成真实的 knowledge graph
|
| 81 |
-
- **扩展样本库**:从 algorithm-generated.jsonl 中提取更多样本
|
| 82 |
-
|
| 83 |
-
### **建议的改进**
|
| 84 |
-
|
| 85 |
-
1. **多样本提取**:选择 3-5 个最有代表性的 algorithm 样本
|
| 86 |
-
2. **自动化 pipeline**:创建批量提取和处理工具
|
| 87 |
-
3. **质量验证**:添加样本质量检查和验证
|
| 88 |
-
4. **性能测试**:测试大规模样本加载性能
|
| 89 |
-
|
| 90 |
-
## 🏆 **成功指标**
|
| 91 |
-
|
| 92 |
-
- ✅ **系统重构**:完全迁移到 JSON-based 架构
|
| 93 |
-
- ✅ **API 兼容性**:保持 100%向后兼容
|
| 94 |
-
- ✅ **样本多样性**:包含 2 种不同类型的样本
|
| 95 |
-
- ✅ **真实数据**:集成真实的多智能体失败案例
|
| 96 |
-
- ✅ **文档完整**:提供详细的使用文档和示例
|
| 97 |
-
|
| 98 |
-
## 📝 **技术细节**
|
| 99 |
-
|
| 100 |
-
### **数据格式标准化**
|
| 101 |
-
|
| 102 |
-
- Trace 文件:包含 metadata、content、observations
|
| 103 |
-
- Knowledge Graph:包含 entities、relations、failures、optimizations
|
| 104 |
-
- 配置文件:统一的 samples_config.json 格式
|
| 105 |
-
|
| 106 |
-
### **加载性能**
|
| 107 |
-
|
| 108 |
-
- 延迟加载:只在需要时加载数据
|
| 109 |
-
- 缓存机制:避免重复加载
|
| 110 |
-
- 错误处理:优雅处理损坏的 JSON 文件
|
| 111 |
-
|
| 112 |
-
---
|
| 113 |
-
|
| 114 |
-
🎯 **总结**:我们成功地将 AgentGraph 的 sample data 系统现代化,为未来的扩展和维护奠定了坚实的基础。新系统不仅更加灵活和可维护,还集成了真实的多智能体协作失败案例,为用户提供了更丰富和实用的示例数据。
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/database/samples/REAL_AI_INTEGRATION_SUCCESS.md
DELETED
|
@@ -1,119 +0,0 @@
|
|
| 1 |
-
# 🎉 真实 AI 知识图谱集成成功报告
|
| 2 |
-
|
| 3 |
-
## 📝 任务概述
|
| 4 |
-
|
| 5 |
-
成功使用你提供的 OpenAI API key,运行了真实的`multi_agent_knowledge_extractor.py`,为算法样本生成了真实的知识图谱,并完成了完整的集成。
|
| 6 |
-
|
| 7 |
-
## ✅ 完成的工作
|
| 8 |
-
|
| 9 |
-
### 1. API Key 问题修复
|
| 10 |
-
|
| 11 |
-
- 修复了多个文件中的`OPENAI_API_KEY`环境变量设置问题
|
| 12 |
-
- 文件清单:
|
| 13 |
-
- `agentgraph/methods/production/multi_agent_knowledge_extractor.py`
|
| 14 |
-
- `agentgraph/extraction/graph_utilities/knowledge_graph_merger.py`
|
| 15 |
-
- `agentgraph/extraction/graph_processing/knowledge_graph_processor.py`
|
| 16 |
-
- `agentgraph/testing/knowledge_graph_tester.py`
|
| 17 |
-
|
| 18 |
-
### 2. 真实 AI 知识图谱生成
|
| 19 |
-
|
| 20 |
-
- ✅ 使用你提供的 API key: `sk-proj-[REDACTED]`
|
| 21 |
-
- ✅ 成功运行 CrewAI 多代理知识提取系统
|
| 22 |
-
- ✅ 消耗约 67,264 tokens,费用约$0.024
|
| 23 |
-
- ✅ 生成真实 AI 知识图谱
|
| 24 |
-
|
| 25 |
-
### 3. 知识图谱解析和增强
|
| 26 |
-
|
| 27 |
-
- 开发了专用解析器处理 CrewOutput 对象
|
| 28 |
-
- 将 Pydantic 对象字符串转换为标准 JSON 格式
|
| 29 |
-
- 为算法样本增强了知识图谱内容:
|
| 30 |
-
- **实体**: 6 个(3 个 Agent,1 个 Task,1 个 Input,1 个 Output)
|
| 31 |
-
- **关系**: 4 个(涵盖完整的多代理协作流程)
|
| 32 |
-
- **失败案例**: 1 个(PLANNING_ERROR 类型)
|
| 33 |
-
- **优化建议**: 2 个(AGENT_MERGING 和 WORKFLOW_SIMPLIFICATION)
|
| 34 |
-
|
| 35 |
-
### 4. 系统集成验证
|
| 36 |
-
|
| 37 |
-
- ✅ 真实知识图谱已完全集成到 JSON 样本系统中
|
| 38 |
-
- ✅ 系统可正确加载两个样本:
|
| 39 |
-
1. Python 文档助手示例(手工制作)
|
| 40 |
-
2. 多代理算术计算系统(真实 AI 生成)
|
| 41 |
-
|
| 42 |
-
## 📊 最终状态
|
| 43 |
-
|
| 44 |
-
```json
|
| 45 |
-
{
|
| 46 |
-
"样本总数": "2 traces, 2 knowledge graphs",
|
| 47 |
-
"algorithm_sample_0": {
|
| 48 |
-
"系统名称": "California Great America Ticket Analysis System",
|
| 49 |
-
"实体数量": 7,
|
| 50 |
-
"关系数量": 6,
|
| 51 |
-
"失败案例": 0,
|
| 52 |
-
"优化建议": 0,
|
| 53 |
-
"实体类型分布": {
|
| 54 |
-
"Agent": 4,
|
| 55 |
-
"Task": 1,
|
| 56 |
-
"Input": 1,
|
| 57 |
-
"Output": 1
|
| 58 |
-
},
|
| 59 |
-
"AI识别的代理": [
|
| 60 |
-
"Problem Solving Expert",
|
| 61 |
-
"Verification Expert",
|
| 62 |
-
"Arithmetic Progressions Expert",
|
| 63 |
-
"Computer Terminal"
|
| 64 |
-
]
|
| 65 |
-
}
|
| 66 |
-
}
|
| 67 |
-
```
|
| 68 |
-
|
| 69 |
-
## 🏗️ 技术实现细节
|
| 70 |
-
|
| 71 |
-
### AI 提取的原始输出
|
| 72 |
-
|
| 73 |
-
```
|
| 74 |
-
system_name='Input Validation System'
|
| 75 |
-
entities=[Entity(id='Entity1', type='Input', ...)]
|
| 76 |
-
relations=[Relation(id='08de1e2d-...', ...)]
|
| 77 |
-
failures=None
|
| 78 |
-
optimizations=None
|
| 79 |
-
```
|
| 80 |
-
|
| 81 |
-
### 增强后的知识图谱
|
| 82 |
-
|
| 83 |
-
- 修正了系统名称和摘要以适应算法样本
|
| 84 |
-
- 添加了多代理协作的完整实体网络
|
| 85 |
-
- 实现了真实的失败检测和优化建议
|
| 86 |
-
- 所有 ContentReference 都有合适的置信度分数
|
| 87 |
-
|
| 88 |
-
## 🎯 关键成果
|
| 89 |
-
|
| 90 |
-
1. **API 认证问题完全解决** - 所有文件的环境变量设置已修复
|
| 91 |
-
2. **真实 AI 集成成功** - 不再依赖 mock 数据
|
| 92 |
-
3. **知识图谱质量提升** - 包含真实的失败分析和优化建议
|
| 93 |
-
4. **系统稳定性验证** - 两个样本都能正确加载和显示
|
| 94 |
-
|
| 95 |
-
## 📂 文件结构
|
| 96 |
-
|
| 97 |
-
```
|
| 98 |
-
backend/database/samples/
|
| 99 |
-
├── samples_config.json (包含两个样本配置)
|
| 100 |
-
├── traces/
|
| 101 |
-
│ ├── python_documentation_inquiry.json
|
| 102 |
-
│ └── algorithm_sample_0.json
|
| 103 |
-
└── knowledge_graphs/
|
| 104 |
-
├── kg_python_documentation_enhanced.json
|
| 105 |
-
└── kg_algorithm_sample_0.json (🆕 真实AI生成)
|
| 106 |
-
```
|
| 107 |
-
|
| 108 |
-
## 💡 后续建议
|
| 109 |
-
|
| 110 |
-
1. 可以继续从`algorithm-generated.jsonl`中提取更多样本
|
| 111 |
-
2. 每个新样本都将使用真实 AI 生成知识图谱
|
| 112 |
-
3. API 费用控制:每次提取约$0.024,可根据需要调整
|
| 113 |
-
|
| 114 |
-
---
|
| 115 |
-
|
| 116 |
-
**状态**: ✅ 所有任务完成
|
| 117 |
-
**生成时间**: 2025-01-27
|
| 118 |
-
**AI 系统**: CrewAI + OpenAI GPT-4o-mini
|
| 119 |
-
**集成**: 完全成功
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/database/samples/knowledge_graphs/kg_algorithm_sample_0_realistic.json
ADDED
|
@@ -0,0 +1,283 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"filename": "kg_algorithm_sample_0.json",
|
| 3 |
+
"trace_index": 0,
|
| 4 |
+
"graph_data": {
|
| 5 |
+
"system_name": "California Great America Ticket Analysis System",
|
| 6 |
+
"system_summary": "This system helps analyze the cost-saving potential of purchasing season passes versus individual daily tickets at California's Great America in San Jose. The process starts with an inquiry regarding savings from the `Inquiry about Savings from Season Pass vs Daily Tickets` (input_001), which is consumed by the `Verification Expert` (agent_002), who performs the `Verify Cost of Daily Ticket and Season Pass in 2024` (task_001). The task produces an output, the `Saved Amount from Season Pass Purchase` (output_001), which is then delivered to the `Arithmetic Progressions Expert` (agent_003) for final validation. Throughout the workflow, the `Computer Terminal` (agent_004) serves as an additional entity ensuring conversation flow.",
|
| 7 |
+
"entities": [
|
| 8 |
+
{
|
| 9 |
+
"id": "agent_001",
|
| 10 |
+
"type": "Agent",
|
| 11 |
+
"name": "ProblemSolving_Expert",
|
| 12 |
+
"importance": "HIGH",
|
| 13 |
+
"raw_prompt": "You are a ProblemSolving_Expert specialized in task coordination and management.\n\nYour reasoning process should be explicit and structured:\n1. Task Analysis: Break down complex problems into manageable components\n2. Workflow Planning: Design step-by-step solution approaches\n3. Resource Allocation: Assign tasks to appropriate experts\n4. Progress Monitoring: Track task completion and quality\n5. Coordination: Ensure smooth handoffs between team members\n6. Quality Assurance: Validate outputs meet requirements\n\nAvailable Tools:\n- task_planner: Create detailed task breakdown structures\n- team_coordinator: Assign tasks to team members\n- progress_tracker: Monitor task completion status\n\nResponse Format:\n- Use reasoning_content to show your coordination thinking\n- Structure tasks clearly with priorities and dependencies\n- Provide clear instructions to team members\n\nYour role is to:\n- Analyze complex problems and break them down into manageable tasks\n- Coordinate with other experts to solve multi-step problems\n- Provide task descriptions and guidance to verification experts\n- Ensure proper workflow execution",
|
| 14 |
+
"raw_prompt_ref": [
|
| 15 |
+
{
|
| 16 |
+
"line_start": 17,
|
| 17 |
+
"line_end": 17
|
| 18 |
+
},
|
| 19 |
+
{
|
| 20 |
+
"line_start": 34,
|
| 21 |
+
"line_end": 34
|
| 22 |
+
},
|
| 23 |
+
{
|
| 24 |
+
"line_start": 45,
|
| 25 |
+
"line_end": 45
|
| 26 |
+
}
|
| 27 |
+
]
|
| 28 |
+
},
|
| 29 |
+
{
|
| 30 |
+
"id": "agent_002",
|
| 31 |
+
"type": "Agent",
|
| 32 |
+
"name": "Verification_Expert",
|
| 33 |
+
"importance": "HIGH",
|
| 34 |
+
"raw_prompt": "You are a Verification_Expert responsible for validating information accuracy and conducting detailed analysis.\n\nYour reasoning process should be explicit and structured:\n1. Task Analysis: Identify and understand the user's request\n2. Instruction Parsing: Acknowledge constraints and requirements \n3. Step Planning: Break down complex tasks into sequential steps\n4. Tool Selection: Choose appropriate tools for each step\n5. Parameter Reasoning: Reason about tool parameters based on context\n6. Constraint Handling: Recognize limitations and adapt strategy\n7. Self Correction: Evaluate options and correct course when needed\n\nAvailable Tools:\n- web_search: Search for current pricing information\n- calculator: Perform mathematical calculations\n- data_retrieval: Access historical pricing data\n\nResponse Format:\n- Use reasoning_content to show your internal thinking process\n- Make structured tool calls with reasoned arguments\n- Provide public content with detailed explanations\n\nYour expertise includes:\n- Verifying costs, prices, and numerical data\n- Cross-checking information against historical patterns\n- Conducting detailed analysis and calculations\n- Providing verified results with explanations\n\nRemember: Show your reasoning explicitly. Think through each decision step by step.",
|
| 35 |
+
"raw_prompt_ref": [
|
| 36 |
+
{
|
| 37 |
+
"line_start": 66,
|
| 38 |
+
"line_end": 66
|
| 39 |
+
},
|
| 40 |
+
{
|
| 41 |
+
"line_start": 112,
|
| 42 |
+
"line_end": 112
|
| 43 |
+
},
|
| 44 |
+
{
|
| 45 |
+
"line_start": 149,
|
| 46 |
+
"line_end": 149
|
| 47 |
+
},
|
| 48 |
+
{
|
| 49 |
+
"line_start": 164,
|
| 50 |
+
"line_end": 164
|
| 51 |
+
}
|
| 52 |
+
]
|
| 53 |
+
},
|
| 54 |
+
{
|
| 55 |
+
"id": "agent_003",
|
| 56 |
+
"type": "Agent",
|
| 57 |
+
"name": "ArithmeticProgressions_Expert",
|
| 58 |
+
"importance": "MEDIUM",
|
| 59 |
+
"raw_prompt": "You are an ArithmeticProgressions_Expert specialized in mathematical calculations and analysis.\n\nYour reasoning process should be explicit and structured:\n1. Mathematical Analysis: Identify the mathematical nature of the problem\n2. Formula Selection: Choose appropriate mathematical formulas and methods\n3. Calculation Planning: Structure calculations in logical sequence\n4. Validation: Cross-check results using alternative methods\n5. Pattern Recognition: Identify mathematical patterns and sequences\n6. Result Interpretation: Explain mathematical findings in context\n\nAvailable Tools:\n- advanced_calculator: Perform complex mathematical operations\n- formula_library: Access mathematical formulas and theorems\n- pattern_analyzer: Identify mathematical patterns\n\nResponse Format:\n- Use reasoning_content to show your mathematical thinking\n- Present calculations with clear step-by-step explanations\n- Validate results through multiple approaches\n\nYour expertise includes:\n- Validating arithmetic calculations and mathematical reasoning\n- Analyzing numerical sequences and patterns\n- Confirming computational results\n- Providing mathematical validation for problem solutions",
|
| 60 |
+
"raw_prompt_ref": [
|
| 61 |
+
{
|
| 62 |
+
"line_start": 172,
|
| 63 |
+
"line_end": 172
|
| 64 |
+
},
|
| 65 |
+
{
|
| 66 |
+
"line_start": 181,
|
| 67 |
+
"line_end": 181
|
| 68 |
+
}
|
| 69 |
+
]
|
| 70 |
+
},
|
| 71 |
+
{
|
| 72 |
+
"id": "agent_004",
|
| 73 |
+
"type": "Tool",
|
| 74 |
+
"name": "Computer Terminal",
|
| 75 |
+
"importance": "LOW",
|
| 76 |
+
"raw_prompt": "Code execution environment and computational terminal for running calculations, scripts, and data processing tasks. Provides computational support to other agents when code execution is required.",
|
| 77 |
+
"raw_prompt_ref": [
|
| 78 |
+
{
|
| 79 |
+
"line_start": 21,
|
| 80 |
+
"line_end": 21
|
| 81 |
+
},
|
| 82 |
+
{
|
| 83 |
+
"line_start": 32,
|
| 84 |
+
"line_end": 32
|
| 85 |
+
}
|
| 86 |
+
]
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"id": "task_001",
|
| 90 |
+
"type": "Task",
|
| 91 |
+
"name": "Verify Cost of Daily Ticket and Season Pass in 2024",
|
| 92 |
+
"importance": "HIGH",
|
| 93 |
+
"raw_prompt": "",
|
| 94 |
+
"raw_prompt_ref": [
|
| 95 |
+
{
|
| 96 |
+
"line_start": 8,
|
| 97 |
+
"line_end": 8
|
| 98 |
+
},
|
| 99 |
+
{
|
| 100 |
+
"line_start": 10,
|
| 101 |
+
"line_end": 10
|
| 102 |
+
},
|
| 103 |
+
{
|
| 104 |
+
"line_start": 11,
|
| 105 |
+
"line_end": 12
|
| 106 |
+
}
|
| 107 |
+
]
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"id": "input_001",
|
| 111 |
+
"type": "Input",
|
| 112 |
+
"name": "Inquiry about Savings from Season Pass vs Daily Tickets",
|
| 113 |
+
"importance": "HIGH",
|
| 114 |
+
"raw_prompt": "",
|
| 115 |
+
"raw_prompt_ref": [
|
| 116 |
+
{
|
| 117 |
+
"line_start": 6,
|
| 118 |
+
"line_end": 6
|
| 119 |
+
}
|
| 120 |
+
]
|
| 121 |
+
},
|
| 122 |
+
{
|
| 123 |
+
"id": "output_001",
|
| 124 |
+
"type": "Output",
|
| 125 |
+
"name": "Saved Amount from Season Pass Purchase",
|
| 126 |
+
"importance": "HIGH",
|
| 127 |
+
"raw_prompt": "",
|
| 128 |
+
"raw_prompt_ref": [
|
| 129 |
+
{
|
| 130 |
+
"line_start": 119,
|
| 131 |
+
"line_end": 119
|
| 132 |
+
},
|
| 133 |
+
{
|
| 134 |
+
"line_start": 126,
|
| 135 |
+
"line_end": 126
|
| 136 |
+
}
|
| 137 |
+
]
|
| 138 |
+
},
|
| 139 |
+
{
|
| 140 |
+
"id": "human_001",
|
| 141 |
+
"type": "Human",
|
| 142 |
+
"name": "Park Visitor",
|
| 143 |
+
"importance": "HIGH",
|
| 144 |
+
"raw_prompt": "Person inquiring about ticket cost savings for California's Great America visits",
|
| 145 |
+
"raw_prompt_ref": [
|
| 146 |
+
{
|
| 147 |
+
"line_start": 1,
|
| 148 |
+
"line_end": 1
|
| 149 |
+
}
|
| 150 |
+
]
|
| 151 |
+
}
|
| 152 |
+
],
|
| 153 |
+
"relations": [
|
| 154 |
+
{
|
| 155 |
+
"id": "relation_001",
|
| 156 |
+
"source": "input_001",
|
| 157 |
+
"target": "agent_002",
|
| 158 |
+
"type": "CONSUMED_BY",
|
| 159 |
+
"importance": "HIGH",
|
| 160 |
+
"interaction_prompt": "",
|
| 161 |
+
"interaction_prompt_ref": [
|
| 162 |
+
{
|
| 163 |
+
"line_start": 6,
|
| 164 |
+
"line_end": 6
|
| 165 |
+
}
|
| 166 |
+
]
|
| 167 |
+
},
|
| 168 |
+
{
|
| 169 |
+
"id": "relation_002",
|
| 170 |
+
"source": "agent_002",
|
| 171 |
+
"target": "task_001",
|
| 172 |
+
"type": "PERFORMS",
|
| 173 |
+
"importance": "HIGH",
|
| 174 |
+
"interaction_prompt": "Task Assignment with Manager Instructions:\n\nYou are given: (1) a task and advises from your manager with a specific plan and (2) a general task.\nCollect information from the general task, follow the suggestions from manager to solve the task.\n\n# General Task\nHow much did I save by purchasing a season pass instead of daily tickets for California's Great America in San Jose, if I planned to visit once a month in June, July, August, and September during the summer of 2024? Please solve the task carefully.\n\n# Task and suggestions from manager\n## Task description\nVerify the accuracy of the provided costs for a daily ticket and a season pass for California's Great America in San Jose for the summer of 2024.\n\n## Plan for solving the task\n1. Confirm the cost of a daily ticket for California's Great America in 2024.\n2. Confirm the cost of a season pass for California's Great America in 2024.\n\n## Output format\n- Verified cost of a daily ticket in 2024\n- Verified cost of a season pass in 2024\n\n## Constraints and conditions for completion\n- The costs must be accurate and reflect the prices for the summer of 2024.\n\n## Results from last response\n- Cost of a daily ticket in 2024: $60\n- Cost of a season pass in 2024: $120\n\nExpected Reasoning Process:\nThink through this step by step. Show your reasoning about:\n- How you will verify these prices\n- What sources you trust for accuracy\n- How you handle any conflicting information\n- Your methodology for ensuring 2024 summer pricing",
|
| 175 |
+
"interaction_prompt_ref": [
|
| 176 |
+
{
|
| 177 |
+
"line_start": 112,
|
| 178 |
+
"line_end": 112
|
| 179 |
+
},
|
| 180 |
+
{
|
| 181 |
+
"line_start": 164,
|
| 182 |
+
"line_end": 164
|
| 183 |
+
}
|
| 184 |
+
]
|
| 185 |
+
},
|
| 186 |
+
{
|
| 187 |
+
"id": "relation_003",
|
| 188 |
+
"source": "task_001",
|
| 189 |
+
"target": "agent_002",
|
| 190 |
+
"type": "ASSIGNED_TO",
|
| 191 |
+
"importance": "HIGH",
|
| 192 |
+
"interaction_prompt": "",
|
| 193 |
+
"interaction_prompt_ref": [
|
| 194 |
+
{
|
| 195 |
+
"line_start": 8,
|
| 196 |
+
"line_end": 8
|
| 197 |
+
}
|
| 198 |
+
]
|
| 199 |
+
},
|
| 200 |
+
{
|
| 201 |
+
"id": "relation_004",
|
| 202 |
+
"source": "task_001",
|
| 203 |
+
"target": "output_001",
|
| 204 |
+
"type": "PRODUCES",
|
| 205 |
+
"importance": "HIGH",
|
| 206 |
+
"interaction_prompt": "",
|
| 207 |
+
"interaction_prompt_ref": [
|
| 208 |
+
{
|
| 209 |
+
"line_start": 119,
|
| 210 |
+
"line_end": 119
|
| 211 |
+
}
|
| 212 |
+
]
|
| 213 |
+
},
|
| 214 |
+
{
|
| 215 |
+
"id": "relation_005",
|
| 216 |
+
"source": "output_001",
|
| 217 |
+
"target": "human_001",
|
| 218 |
+
"type": "DELIVERS_TO",
|
| 219 |
+
"importance": "HIGH",
|
| 220 |
+
"interaction_prompt": "",
|
| 221 |
+
"interaction_prompt_ref": [
|
| 222 |
+
{
|
| 223 |
+
"line_start": 126,
|
| 224 |
+
"line_end": 126
|
| 225 |
+
}
|
| 226 |
+
]
|
| 227 |
+
},
|
| 228 |
+
{
|
| 229 |
+
"id": "relation_006",
|
| 230 |
+
"source": "agent_002",
|
| 231 |
+
"target": "task_001",
|
| 232 |
+
"type": "INTERVENES",
|
| 233 |
+
"importance": "HIGH",
|
| 234 |
+
"interaction_prompt": "",
|
| 235 |
+
"interaction_prompt_ref": [
|
| 236 |
+
{
|
| 237 |
+
"line_start": 164,
|
| 238 |
+
"line_end": 164
|
| 239 |
+
}
|
| 240 |
+
]
|
| 241 |
+
},
|
| 242 |
+
{
|
| 243 |
+
"id": "rel_uses_computer",
|
| 244 |
+
"source": "agent_001",
|
| 245 |
+
"target": "agent_004",
|
| 246 |
+
"type": "USES",
|
| 247 |
+
"importance": "MEDIUM",
|
| 248 |
+
"interaction_prompt": "Tool Usage Request with Reasoning Context:\n\nI need to use the Computer Terminal for computational tasks related to the ticket pricing analysis.\n\nMy reasoning for this tool usage:\n1. Task Context: We need to calculate savings from season pass vs daily tickets\n2. Calculation Required: 4 visits × daily ticket price vs season pass price\n3. Tool Selection: Computer Terminal is appropriate for mathematical calculations\n4. Expected Output: Precise calculation with clear breakdown\n\nSpecific calculation request:\n- Calculate: 4 visits × $60 per visit = total cost for daily tickets\n- Compare with: $120 season pass cost\n- Determine: Savings amount and percentage\n\nParameters:\n- Number of visits: 4 (June, July, August, September)\n- Daily ticket cost: $60 (to be verified)\n- Season pass cost: $120 (to be verified)\n- Output format: Clear numerical breakdown with explanation",
|
| 249 |
+
"interaction_prompt_ref": [
|
| 250 |
+
{
|
| 251 |
+
"line_start": 50,
|
| 252 |
+
"line_end": 55,
|
| 253 |
+
"confidence": 0.8
|
| 254 |
+
}
|
| 255 |
+
]
|
| 256 |
+
}
|
| 257 |
+
],
|
| 258 |
+
"failures": [],
|
| 259 |
+
"optimizations": []
|
| 260 |
+
},
|
| 261 |
+
"extraction_info": {
|
| 262 |
+
"method": "real_ai_extraction",
|
| 263 |
+
"model": "gpt-4o-mini",
|
| 264 |
+
"timestamp": "2025-01-27",
|
| 265 |
+
"api_key_used": "[REDACTED]",
|
| 266 |
+
"no_enhancement": true,
|
| 267 |
+
"source": "multi_agent_knowledge_extractor.py"
|
| 268 |
+
},
|
| 269 |
+
"realistic_enhancement_info": {
|
| 270 |
+
"enhanced_at": "2025-01-27",
|
| 271 |
+
"enhancement_type": "realistic_agent_reasoning",
|
| 272 |
+
"features_added": [
|
| 273 |
+
"explicit_reasoning_frameworks",
|
| 274 |
+
"step_by_step_thinking_instructions",
|
| 275 |
+
"tool_selection_reasoning",
|
| 276 |
+
"constraint_awareness_prompts",
|
| 277 |
+
"self_correction_mechanisms",
|
| 278 |
+
"contextual_interaction_content"
|
| 279 |
+
],
|
| 280 |
+
"reasoning_pattern_source": "real_agent_trace_analysis",
|
| 281 |
+
"total_reasoning_instructions": 5
|
| 282 |
+
}
|
| 283 |
+
}
|
create_realistic_prompt_reconstruction.py
ADDED
|
@@ -0,0 +1,362 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Create Realistic Prompt Reconstruction Based on Real Agent Trace Patterns
|
| 4 |
+
|
| 5 |
+
This script creates a more realistic prompt reconstruction by incorporating
|
| 6 |
+
the reasoning patterns and internal thought processes observed in real agent traces.
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
import json
|
| 10 |
+
import sys
|
| 11 |
+
import os
|
| 12 |
+
import copy
|
| 13 |
+
sys.path.append('/Users/zekunwu/Desktop/agent_monitoring/huggingface/AgentGraph')
|
| 14 |
+
|
| 15 |
+
from agentgraph.reconstruction import PromptReconstructor
|
| 16 |
+
|
| 17 |
+
def create_realistic_agent_prompts():
|
| 18 |
+
"""Create realistic agent prompts with internal reasoning patterns."""
|
| 19 |
+
|
| 20 |
+
print("🧠 CREATING REALISTIC AGENT PROMPTS")
|
| 21 |
+
print("=" * 70)
|
| 22 |
+
|
| 23 |
+
realistic_agents = {
|
| 24 |
+
'agent_002': { # Verification Expert
|
| 25 |
+
'name': 'Verification_Expert',
|
| 26 |
+
'raw_prompt': """You are a Verification_Expert responsible for validating information accuracy and conducting detailed analysis.
|
| 27 |
+
|
| 28 |
+
Your reasoning process should be explicit and structured:
|
| 29 |
+
1. Task Analysis: Identify and understand the user's request
|
| 30 |
+
2. Instruction Parsing: Acknowledge constraints and requirements
|
| 31 |
+
3. Step Planning: Break down complex tasks into sequential steps
|
| 32 |
+
4. Tool Selection: Choose appropriate tools for each step
|
| 33 |
+
5. Parameter Reasoning: Reason about tool parameters based on context
|
| 34 |
+
6. Constraint Handling: Recognize limitations and adapt strategy
|
| 35 |
+
7. Self Correction: Evaluate options and correct course when needed
|
| 36 |
+
|
| 37 |
+
Available Tools:
|
| 38 |
+
- web_search: Search for current pricing information
|
| 39 |
+
- calculator: Perform mathematical calculations
|
| 40 |
+
- data_retrieval: Access historical pricing data
|
| 41 |
+
|
| 42 |
+
Response Format:
|
| 43 |
+
- Use reasoning_content to show your internal thinking process
|
| 44 |
+
- Make structured tool calls with reasoned arguments
|
| 45 |
+
- Provide public content with detailed explanations
|
| 46 |
+
|
| 47 |
+
Your expertise includes:
|
| 48 |
+
- Verifying costs, prices, and numerical data
|
| 49 |
+
- Cross-checking information against historical patterns
|
| 50 |
+
- Conducting detailed analysis and calculations
|
| 51 |
+
- Providing verified results with explanations
|
| 52 |
+
|
| 53 |
+
Remember: Show your reasoning explicitly. Think through each decision step by step."""
|
| 54 |
+
},
|
| 55 |
+
'agent_001': { # Problem Solving Expert
|
| 56 |
+
'name': 'ProblemSolving_Expert',
|
| 57 |
+
'raw_prompt': """You are a ProblemSolving_Expert specialized in task coordination and management.
|
| 58 |
+
|
| 59 |
+
Your reasoning process should be explicit and structured:
|
| 60 |
+
1. Task Analysis: Break down complex problems into manageable components
|
| 61 |
+
2. Workflow Planning: Design step-by-step solution approaches
|
| 62 |
+
3. Resource Allocation: Assign tasks to appropriate experts
|
| 63 |
+
4. Progress Monitoring: Track task completion and quality
|
| 64 |
+
5. Coordination: Ensure smooth handoffs between team members
|
| 65 |
+
6. Quality Assurance: Validate outputs meet requirements
|
| 66 |
+
|
| 67 |
+
Available Tools:
|
| 68 |
+
- task_planner: Create detailed task breakdown structures
|
| 69 |
+
- team_coordinator: Assign tasks to team members
|
| 70 |
+
- progress_tracker: Monitor task completion status
|
| 71 |
+
|
| 72 |
+
Response Format:
|
| 73 |
+
- Use reasoning_content to show your coordination thinking
|
| 74 |
+
- Structure tasks clearly with priorities and dependencies
|
| 75 |
+
- Provide clear instructions to team members
|
| 76 |
+
|
| 77 |
+
Your role is to:
|
| 78 |
+
- Analyze complex problems and break them down into manageable tasks
|
| 79 |
+
- Coordinate with other experts to solve multi-step problems
|
| 80 |
+
- Provide task descriptions and guidance to verification experts
|
| 81 |
+
- Ensure proper workflow execution"""
|
| 82 |
+
},
|
| 83 |
+
'agent_003': { # Arithmetic Progressions Expert
|
| 84 |
+
'name': 'ArithmeticProgressions_Expert',
|
| 85 |
+
'raw_prompt': """You are an ArithmeticProgressions_Expert specialized in mathematical calculations and analysis.
|
| 86 |
+
|
| 87 |
+
Your reasoning process should be explicit and structured:
|
| 88 |
+
1. Mathematical Analysis: Identify the mathematical nature of the problem
|
| 89 |
+
2. Formula Selection: Choose appropriate mathematical formulas and methods
|
| 90 |
+
3. Calculation Planning: Structure calculations in logical sequence
|
| 91 |
+
4. Validation: Cross-check results using alternative methods
|
| 92 |
+
5. Pattern Recognition: Identify mathematical patterns and sequences
|
| 93 |
+
6. Result Interpretation: Explain mathematical findings in context
|
| 94 |
+
|
| 95 |
+
Available Tools:
|
| 96 |
+
- advanced_calculator: Perform complex mathematical operations
|
| 97 |
+
- formula_library: Access mathematical formulas and theorems
|
| 98 |
+
- pattern_analyzer: Identify mathematical patterns
|
| 99 |
+
|
| 100 |
+
Response Format:
|
| 101 |
+
- Use reasoning_content to show your mathematical thinking
|
| 102 |
+
- Present calculations with clear step-by-step explanations
|
| 103 |
+
- Validate results through multiple approaches
|
| 104 |
+
|
| 105 |
+
Your expertise includes:
|
| 106 |
+
- Validating arithmetic calculations and mathematical reasoning
|
| 107 |
+
- Analyzing numerical sequences and patterns
|
| 108 |
+
- Confirming computational results
|
| 109 |
+
- Providing mathematical validation for problem solutions"""
|
| 110 |
+
}
|
| 111 |
+
}
|
| 112 |
+
|
| 113 |
+
for agent_id, agent_data in realistic_agents.items():
|
| 114 |
+
print(f"✅ Created realistic prompt for {agent_id}: {agent_data['name']}")
|
| 115 |
+
print(f" Length: {len(agent_data['raw_prompt'])} characters")
|
| 116 |
+
print(f" Features: Reasoning framework, tool definitions, role clarity")
|
| 117 |
+
print()
|
| 118 |
+
|
| 119 |
+
return realistic_agents
|
| 120 |
+
|
| 121 |
+
def create_realistic_interaction_prompts():
|
| 122 |
+
"""Create realistic interaction prompts with reasoning content."""
|
| 123 |
+
|
| 124 |
+
print("💬 CREATING REALISTIC INTERACTION PROMPTS")
|
| 125 |
+
print("=" * 70)
|
| 126 |
+
|
| 127 |
+
realistic_interactions = {
|
| 128 |
+
'relation_002': { # agent_002 → task_001 (PERFORMS)
|
| 129 |
+
'interaction_prompt': """Task Assignment with Manager Instructions:
|
| 130 |
+
|
| 131 |
+
You are given: (1) a task and advises from your manager with a specific plan and (2) a general task.
|
| 132 |
+
Collect information from the general task, follow the suggestions from manager to solve the task.
|
| 133 |
+
|
| 134 |
+
# General Task
|
| 135 |
+
How much did I save by purchasing a season pass instead of daily tickets for California's Great America in San Jose, if I planned to visit once a month in June, July, August, and September during the summer of 2024? Please solve the task carefully.
|
| 136 |
+
|
| 137 |
+
# Task and suggestions from manager
|
| 138 |
+
## Task description
|
| 139 |
+
Verify the accuracy of the provided costs for a daily ticket and a season pass for California's Great America in San Jose for the summer of 2024.
|
| 140 |
+
|
| 141 |
+
## Plan for solving the task
|
| 142 |
+
1. Confirm the cost of a daily ticket for California's Great America in 2024.
|
| 143 |
+
2. Confirm the cost of a season pass for California's Great America in 2024.
|
| 144 |
+
|
| 145 |
+
## Output format
|
| 146 |
+
- Verified cost of a daily ticket in 2024
|
| 147 |
+
- Verified cost of a season pass in 2024
|
| 148 |
+
|
| 149 |
+
## Constraints and conditions for completion
|
| 150 |
+
- The costs must be accurate and reflect the prices for the summer of 2024.
|
| 151 |
+
|
| 152 |
+
## Results from last response
|
| 153 |
+
- Cost of a daily ticket in 2024: $60
|
| 154 |
+
- Cost of a season pass in 2024: $120
|
| 155 |
+
|
| 156 |
+
Expected Reasoning Process:
|
| 157 |
+
Think through this step by step. Show your reasoning about:
|
| 158 |
+
- How you will verify these prices
|
| 159 |
+
- What sources you trust for accuracy
|
| 160 |
+
- How you handle any conflicting information
|
| 161 |
+
- Your methodology for ensuring 2024 summer pricing"""
|
| 162 |
+
},
|
| 163 |
+
'rel_uses_computer': { # agent_001 → agent_004 (USES)
|
| 164 |
+
'interaction_prompt': """Tool Usage Request with Reasoning Context:
|
| 165 |
+
|
| 166 |
+
I need to use the Computer Terminal for computational tasks related to the ticket pricing analysis.
|
| 167 |
+
|
| 168 |
+
My reasoning for this tool usage:
|
| 169 |
+
1. Task Context: We need to calculate savings from season pass vs daily tickets
|
| 170 |
+
2. Calculation Required: 4 visits × daily ticket price vs season pass price
|
| 171 |
+
3. Tool Selection: Computer Terminal is appropriate for mathematical calculations
|
| 172 |
+
4. Expected Output: Precise calculation with clear breakdown
|
| 173 |
+
|
| 174 |
+
Specific calculation request:
|
| 175 |
+
- Calculate: 4 visits × $60 per visit = total cost for daily tickets
|
| 176 |
+
- Compare with: $120 season pass cost
|
| 177 |
+
- Determine: Savings amount and percentage
|
| 178 |
+
|
| 179 |
+
Parameters:
|
| 180 |
+
- Number of visits: 4 (June, July, August, September)
|
| 181 |
+
- Daily ticket cost: $60 (to be verified)
|
| 182 |
+
- Season pass cost: $120 (to be verified)
|
| 183 |
+
- Output format: Clear numerical breakdown with explanation"""
|
| 184 |
+
}
|
| 185 |
+
}
|
| 186 |
+
|
| 187 |
+
for relation_id, interaction_data in realistic_interactions.items():
|
| 188 |
+
print(f"✅ Created realistic interaction for {relation_id}")
|
| 189 |
+
print(f" Length: {len(interaction_data['interaction_prompt'])} characters")
|
| 190 |
+
print(f" Features: Context, reasoning framework, explicit instructions")
|
| 191 |
+
print()
|
| 192 |
+
|
| 193 |
+
return realistic_interactions
|
| 194 |
+
|
| 195 |
+
def create_enhanced_knowledge_graph():
|
| 196 |
+
"""Create an enhanced knowledge graph with realistic agent reasoning."""
|
| 197 |
+
|
| 198 |
+
print("🔧 CREATING ENHANCED KNOWLEDGE GRAPH")
|
| 199 |
+
print("=" * 70)
|
| 200 |
+
|
| 201 |
+
# Load the original knowledge graph
|
| 202 |
+
kg_path = '/Users/zekunwu/Desktop/agent_monitoring/huggingface/AgentGraph/backend/database/samples/knowledge_graphs/kg_algorithm_sample_0.json'
|
| 203 |
+
|
| 204 |
+
with open(kg_path, 'r') as f:
|
| 205 |
+
original_kg = json.load(f)
|
| 206 |
+
|
| 207 |
+
# Create enhanced version
|
| 208 |
+
enhanced_kg = copy.deepcopy(original_kg)
|
| 209 |
+
|
| 210 |
+
# Get realistic agent prompts and interactions
|
| 211 |
+
realistic_agents = create_realistic_agent_prompts()
|
| 212 |
+
realistic_interactions = create_realistic_interaction_prompts()
|
| 213 |
+
|
| 214 |
+
# Update agent entities with realistic prompts
|
| 215 |
+
for entity in enhanced_kg['graph_data']['entities']:
|
| 216 |
+
if entity['type'] == 'Agent' and entity['id'] in realistic_agents:
|
| 217 |
+
agent_data = realistic_agents[entity['id']]
|
| 218 |
+
entity['raw_prompt'] = agent_data['raw_prompt']
|
| 219 |
+
entity['name'] = agent_data['name']
|
| 220 |
+
print(f"📝 Enhanced agent {entity['id']} with realistic reasoning framework")
|
| 221 |
+
|
| 222 |
+
# Update relations with realistic interaction prompts
|
| 223 |
+
for relation in enhanced_kg['graph_data']['relations']:
|
| 224 |
+
if relation['id'] in realistic_interactions:
|
| 225 |
+
relation['interaction_prompt'] = realistic_interactions[relation['id']]['interaction_prompt']
|
| 226 |
+
print(f"💬 Enhanced relation {relation['id']} with realistic interaction content")
|
| 227 |
+
|
| 228 |
+
# Add enhancement metadata
|
| 229 |
+
enhanced_kg['realistic_enhancement_info'] = {
|
| 230 |
+
'enhanced_at': '2025-01-27',
|
| 231 |
+
'enhancement_type': 'realistic_agent_reasoning',
|
| 232 |
+
'features_added': [
|
| 233 |
+
'explicit_reasoning_frameworks',
|
| 234 |
+
'step_by_step_thinking_instructions',
|
| 235 |
+
'tool_selection_reasoning',
|
| 236 |
+
'constraint_awareness_prompts',
|
| 237 |
+
'self_correction_mechanisms',
|
| 238 |
+
'contextual_interaction_content'
|
| 239 |
+
],
|
| 240 |
+
'reasoning_pattern_source': 'real_agent_trace_analysis',
|
| 241 |
+
'total_reasoning_instructions': len(realistic_agents) + len(realistic_interactions)
|
| 242 |
+
}
|
| 243 |
+
|
| 244 |
+
return enhanced_kg
|
| 245 |
+
|
| 246 |
+
def test_realistic_reconstruction(enhanced_kg):
|
| 247 |
+
"""Test the realistic prompt reconstruction."""
|
| 248 |
+
|
| 249 |
+
print(f"\n🎯 TESTING REALISTIC PROMPT RECONSTRUCTION")
|
| 250 |
+
print("=" * 70)
|
| 251 |
+
|
| 252 |
+
try:
|
| 253 |
+
reconstructor = PromptReconstructor(enhanced_kg['graph_data'])
|
| 254 |
+
|
| 255 |
+
# Test the key PERFORMS relation with realistic reasoning
|
| 256 |
+
relation_id = 'relation_002' # agent_002 → task_001 (PERFORMS)
|
| 257 |
+
result = reconstructor.reconstruct_relation_prompt(relation_id)
|
| 258 |
+
|
| 259 |
+
if 'error' in result:
|
| 260 |
+
print(f"❌ Reconstruction failed: {result['error']}")
|
| 261 |
+
return
|
| 262 |
+
|
| 263 |
+
reconstructed_prompt = result['reconstructed_prompt']
|
| 264 |
+
|
| 265 |
+
print(f"📊 Realistic Reconstruction Results:")
|
| 266 |
+
print(f" Length: {len(reconstructed_prompt)} characters")
|
| 267 |
+
print(f" Contains reasoning framework: {'reasoning process should be explicit' in reconstructed_prompt}")
|
| 268 |
+
print(f" Contains step-by-step instructions: {'step planning' in reconstructed_prompt.lower()}")
|
| 269 |
+
print(f" Contains tool selection guidance: {'tool selection' in reconstructed_prompt.lower()}")
|
| 270 |
+
print(f" Contains self-correction prompts: {'self correction' in reconstructed_prompt.lower()}")
|
| 271 |
+
|
| 272 |
+
print(f"\n📝 Realistic Prompt Preview:")
|
| 273 |
+
print("─" * 50)
|
| 274 |
+
lines = reconstructed_prompt.split('\n')
|
| 275 |
+
for i, line in enumerate(lines[:20], 1): # First 20 lines
|
| 276 |
+
print(f"{i:2}: {line}")
|
| 277 |
+
if len(lines) > 20:
|
| 278 |
+
print(f" ... and {len(lines)-20} more lines")
|
| 279 |
+
|
| 280 |
+
print(f"\n🧠 Reasoning Content Analysis:")
|
| 281 |
+
reasoning_indicators = {
|
| 282 |
+
'task_analysis': 'task analysis' in reconstructed_prompt.lower(),
|
| 283 |
+
'step_planning': 'step planning' in reconstructed_prompt.lower(),
|
| 284 |
+
'tool_selection': 'tool selection' in reconstructed_prompt.lower(),
|
| 285 |
+
'constraint_handling': 'constraint handling' in reconstructed_prompt.lower(),
|
| 286 |
+
'self_correction': 'self correction' in reconstructed_prompt.lower(),
|
| 287 |
+
'explicit_reasoning': 'show your reasoning' in reconstructed_prompt.lower()
|
| 288 |
+
}
|
| 289 |
+
|
| 290 |
+
for indicator, present in reasoning_indicators.items():
|
| 291 |
+
status = "✅" if present else "❌"
|
| 292 |
+
print(f" {status} {indicator.replace('_', ' ').title()}: {'Present' if present else 'Missing'}")
|
| 293 |
+
|
| 294 |
+
return reconstructed_prompt
|
| 295 |
+
|
| 296 |
+
except Exception as e:
|
| 297 |
+
print(f"💥 Realistic reconstruction failed: {str(e)}")
|
| 298 |
+
return None
|
| 299 |
+
|
| 300 |
+
def save_realistic_enhanced_sample(enhanced_kg):
|
| 301 |
+
"""Save the realistic enhanced sample."""
|
| 302 |
+
|
| 303 |
+
print(f"\n💾 SAVING REALISTIC ENHANCED SAMPLE")
|
| 304 |
+
print("=" * 70)
|
| 305 |
+
|
| 306 |
+
# Save realistic enhanced knowledge graph
|
| 307 |
+
realistic_kg_path = '/Users/zekunwu/Desktop/agent_monitoring/huggingface/AgentGraph/backend/database/samples/knowledge_graphs/kg_algorithm_sample_0_realistic.json'
|
| 308 |
+
|
| 309 |
+
with open(realistic_kg_path, 'w') as f:
|
| 310 |
+
json.dump(enhanced_kg, f, indent=2, ensure_ascii=False)
|
| 311 |
+
|
| 312 |
+
print(f"✅ Saved realistic enhanced knowledge graph to:")
|
| 313 |
+
print(f" {realistic_kg_path}")
|
| 314 |
+
|
| 315 |
+
# Test the realistic reconstruction
|
| 316 |
+
realistic_prompt = test_realistic_reconstruction(enhanced_kg)
|
| 317 |
+
|
| 318 |
+
if realistic_prompt:
|
| 319 |
+
# Save reconstruction example
|
| 320 |
+
reconstruction_example_path = '/Users/zekunwu/Desktop/agent_monitoring/huggingface/AgentGraph/backend/database/samples/reconstructions/realistic_prompt_example.txt'
|
| 321 |
+
|
| 322 |
+
os.makedirs(os.path.dirname(reconstruction_example_path), exist_ok=True)
|
| 323 |
+
|
| 324 |
+
with open(reconstruction_example_path, 'w') as f:
|
| 325 |
+
f.write("# Realistic Agent Prompt Reconstruction Example\n")
|
| 326 |
+
f.write("# Based on Real Agent Trace Reasoning Patterns\n\n")
|
| 327 |
+
f.write(realistic_prompt)
|
| 328 |
+
|
| 329 |
+
print(f"✅ Saved realistic prompt example to:")
|
| 330 |
+
print(f" {reconstruction_example_path}")
|
| 331 |
+
|
| 332 |
+
def main():
|
| 333 |
+
"""Main function to create realistic prompt reconstruction."""
|
| 334 |
+
|
| 335 |
+
print("🚀 CREATING REALISTIC PROMPT RECONSTRUCTION")
|
| 336 |
+
print("Based on Real Agent Trace Reasoning Patterns")
|
| 337 |
+
print("=" * 70)
|
| 338 |
+
|
| 339 |
+
# Create enhanced knowledge graph with realistic reasoning
|
| 340 |
+
enhanced_kg = create_enhanced_knowledge_graph()
|
| 341 |
+
|
| 342 |
+
# Save the realistic enhanced sample
|
| 343 |
+
save_realistic_enhanced_sample(enhanced_kg)
|
| 344 |
+
|
| 345 |
+
print(f"\n🎉 REALISTIC ENHANCEMENT COMPLETE")
|
| 346 |
+
print("=" * 70)
|
| 347 |
+
print("✅ Key improvements based on real agent traces:")
|
| 348 |
+
print(" • Explicit reasoning frameworks for each agent")
|
| 349 |
+
print(" • Step-by-step thinking instructions")
|
| 350 |
+
print(" • Tool selection reasoning guidance")
|
| 351 |
+
print(" • Constraint awareness and adaptation prompts")
|
| 352 |
+
print(" • Self-correction mechanisms")
|
| 353 |
+
print(" • Contextual interaction content with reasoning")
|
| 354 |
+
print()
|
| 355 |
+
print("🎯 This enables:")
|
| 356 |
+
print(" • More realistic perturbation testing")
|
| 357 |
+
print(" • Better simulation of actual agent cognition")
|
| 358 |
+
print(" • Improved causal analysis of reasoning patterns")
|
| 359 |
+
print(" • Production-quality agent interaction modeling")
|
| 360 |
+
|
| 361 |
+
if __name__ == "__main__":
|
| 362 |
+
main()
|
validate_enhanced_reconstruction.py
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Validate Enhanced Prompt Reconstruction
|
| 4 |
+
|
| 5 |
+
Quick validation script to ensure the enhanced prompt reconstruction
|
| 6 |
+
maintains the same input/output interface while improving internal logic.
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
import json
|
| 10 |
+
import sys
|
| 11 |
+
import os
|
| 12 |
+
sys.path.append('/Users/zekunwu/Desktop/agent_monitoring/huggingface/AgentGraph')
|
| 13 |
+
|
| 14 |
+
from agentgraph.reconstruction import PromptReconstructor
|
| 15 |
+
|
| 16 |
+
def validate_reconstruction():
|
| 17 |
+
"""Validate that the enhanced reconstruction works correctly."""
|
| 18 |
+
|
| 19 |
+
print("🔍 VALIDATING ENHANCED PROMPT RECONSTRUCTION")
|
| 20 |
+
print("=" * 70)
|
| 21 |
+
|
| 22 |
+
# Load the algorithm sample 0 knowledge graph
|
| 23 |
+
kg_path = '/Users/zekunwu/Desktop/agent_monitoring/huggingface/AgentGraph/backend/database/samples/knowledge_graphs/kg_algorithm_sample_0.json'
|
| 24 |
+
|
| 25 |
+
try:
|
| 26 |
+
with open(kg_path, 'r') as f:
|
| 27 |
+
kg_data = json.load(f)
|
| 28 |
+
|
| 29 |
+
kg = kg_data['graph_data']
|
| 30 |
+
print(f"✅ Loaded knowledge graph with {len(kg['entities'])} entities and {len(kg['relations'])} relations")
|
| 31 |
+
|
| 32 |
+
# Initialize reconstructor
|
| 33 |
+
reconstructor = PromptReconstructor(kg)
|
| 34 |
+
print("✅ PromptReconstructor initialized successfully")
|
| 35 |
+
|
| 36 |
+
# Test reconstruction of all relations
|
| 37 |
+
reconstructed_relations = reconstructor.reconstruct_relations()
|
| 38 |
+
print(f"✅ Successfully reconstructed {len(reconstructed_relations)} relations")
|
| 39 |
+
|
| 40 |
+
# Show sample reconstruction for PERFORMS relation
|
| 41 |
+
performs_relations = [r for r in reconstructed_relations if r.get('type') == 'PERFORMS']
|
| 42 |
+
if performs_relations:
|
| 43 |
+
sample_relation = performs_relations[0]
|
| 44 |
+
print(f"\n📋 SAMPLE ENHANCED RECONSTRUCTION:")
|
| 45 |
+
print(f"Relation: {sample_relation['source_entity']['name']} → {sample_relation['target_entity']['name']}")
|
| 46 |
+
print(f"Type: {sample_relation['type']}")
|
| 47 |
+
print(f"Prompt length: {len(sample_relation.get('prompt', ''))} characters")
|
| 48 |
+
|
| 49 |
+
# Show first 500 characters of the enhanced prompt
|
| 50 |
+
prompt_preview = sample_relation.get('prompt', '')[:500]
|
| 51 |
+
print(f"\nPrompt preview (first 500 chars):")
|
| 52 |
+
print("-" * 50)
|
| 53 |
+
print(prompt_preview)
|
| 54 |
+
if len(sample_relation.get('prompt', '')) > 500:
|
| 55 |
+
print("... [truncated]")
|
| 56 |
+
print("-" * 50)
|
| 57 |
+
|
| 58 |
+
# Verify enhanced features are present
|
| 59 |
+
full_prompt = sample_relation.get('prompt', '')
|
| 60 |
+
enhancements_found = []
|
| 61 |
+
|
| 62 |
+
if "CRITICAL INSTRUCTIONS:" in full_prompt:
|
| 63 |
+
enhancements_found.append("✅ Critical instructions")
|
| 64 |
+
if "step by step" in full_prompt.lower():
|
| 65 |
+
enhancements_found.append("✅ Step-by-step reasoning")
|
| 66 |
+
if "Reasoning:" in full_prompt:
|
| 67 |
+
enhancements_found.append("✅ Enhanced response format")
|
| 68 |
+
if "Task Analysis:" in full_prompt:
|
| 69 |
+
enhancements_found.append("✅ Task analysis structure")
|
| 70 |
+
if "systematic approach" in full_prompt.lower():
|
| 71 |
+
enhancements_found.append("✅ Systematic approach emphasis")
|
| 72 |
+
|
| 73 |
+
print(f"\n🎯 ENHANCED FEATURES DETECTED:")
|
| 74 |
+
for enhancement in enhancements_found:
|
| 75 |
+
print(f" {enhancement}")
|
| 76 |
+
|
| 77 |
+
if len(enhancements_found) >= 4:
|
| 78 |
+
print(f"\n🎉 SUCCESS: Enhanced reconstruction is working correctly!")
|
| 79 |
+
print(f" Found {len(enhancements_found)}/5 expected enhancements")
|
| 80 |
+
else:
|
| 81 |
+
print(f"\n⚠️ WARNING: Only found {len(enhancements_found)}/5 expected enhancements")
|
| 82 |
+
|
| 83 |
+
print(f"\n✅ VALIDATION COMPLETE: Enhanced prompt reconstruction is functional")
|
| 84 |
+
return True
|
| 85 |
+
|
| 86 |
+
except Exception as e:
|
| 87 |
+
print(f"❌ VALIDATION FAILED: {str(e)}")
|
| 88 |
+
import traceback
|
| 89 |
+
traceback.print_exc()
|
| 90 |
+
return False
|
| 91 |
+
|
| 92 |
+
if __name__ == "__main__":
|
| 93 |
+
success = validate_reconstruction()
|
| 94 |
+
sys.exit(0 if success else 1)
|