Spaces:
Running
Running
File size: 58,404 Bytes
c2ea5ed 7bc750c c2ea5ed |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 |
from typing import List
import os
from agentgraph.graph_generation.knowledge_graph_langsmith.knowledge_graph.component_types import (
Entity,
Relation,
KnowledgeGraph,
)
from pydantic import BaseModel
from typing import List
class EntityExtractionList(BaseModel):
entities: List[Entity] = []
class RelationshipExtractionList(BaseModel):
relations: List[Relation] = []
from agentgraph.graph_generation.knowledge_graph_langsmith.knowledge_graph.agent_base_utils import (
run_agent,
create_system_prompt,
create_task_prompt,
)
import json
async def entity_extractor(input_data, context_documents=None) -> EntityExtractionList:
# Define instruction prompts as strings (extracted from task descriptions)
ENTITY_EXTRACTION_INSTRUCTION_PROMPT = f"""
Extract and categorize all entities from the provided agent system information using REFERENCE-BASED EXTRACTION as the primary method.
**CONTEXT DOCUMENTS AVAILABLE:**
The following context documents are available to enhance your understanding:
- {context_documents if context_documents else "None provided."}
**PRIMARY INPUT DATA:**
Here is the main trace you are analyzing:
- {input_data}
**CRITICAL: REFERENCE-ONLY EXTRACTION**
- You MUST leave the `raw_prompt` field as an empty string "" for ALL entities
- You MUST ONLY populate the `raw_prompt_ref` field with location references
- DO NOT extract or include the actual prompt content - only identify WHERE it is located
- The actual content will be extracted later by other functions using your references
**CONTEXT-ENHANCED EXTRACTION:**
Use the provided context documents to:
1. Better understand domain-specific terminology and concepts
2. Identify entities that might be domain-specific or technical
3. Recognize patterns and relationships specific to the business domain
4. Apply any provided schemas or guidelines for entity categorization
5. Reference examples to understand expected entity types and formats
**PROMPT DEFINITION**
A *prompt* is the exact text that will be injected into an LLM and which establishes the behaviour or definition of an entity (system / instruction / specification) or of a relation (interaction excerpt, format specification, etc.).
**CRITICAL MULTI-OCCURRENCE REQUIREMENT (read carefully)**
- The trace you receive is already numbered with `<L#>` markers.
- For EVERY distinct prompt you MUST enumerate *all* **contiguous occurrences** of that prompt text in the numbered trace.
- Represent each occurrence with exactly one `ContentReference` object whose `line_start` is the first `<L#>` line of the block and whose `line_end` is the last `<L#>` line of that same uninterrupted block (indented continuation-lines included).
- The `raw_prompt_ref` list length **must therefore equal** the number of separate occurrences (not the number of lines). Missing even **one** occurrence will fail validation.
- Overlap between the references of different entities is acceptable when prompts are truly shared.
- Tool definitions that begin with `@tool` ARE ALSO PROMPTS. Treat them exactly like other prompts: leave `raw_prompt` blank and add one `ContentReference` per occurrence.
Example (prompt appears twice across two blocks):
```json
{{
"id": "agent_001",
"type": "Agent",
"name": "Time Tracker Agent",
"raw_prompt": "", // ALWAYS EMPTY - DO NOT FILL
"raw_prompt_ref": [
{{"line_start": 3, "line_end": 3}},
{{"line_start": 9, "line_end": 9}}
]
}}
```
Tool-definition example (single occurrence with verification):
```json
{{
"id": "tool_001",
"type": "Tool",
"name": "zip_compress",
"raw_prompt": "", // ALWAYS EMPTY - DO NOT FILL
"raw_prompt_ref": [
{{"line_start": 15, "line_end": 15}}
]
}}
```
Verification process used:
- Located anchor text "@tool" and "zip_compress" in the input
- Counted from <L1> to find the exact <L15> marker
- Verified <L15> contains the complete tool definition
- **CRITICAL: raw_prompt left empty as required**
CORE PRINCIPLE: Each entity is defined by its DISTINCT PROMPT LOCATION, not by extracting the actual content.
This approach ensures:
- More robust and stable knowledge graphs across multiple traces
- Better entity distinction and relationship mapping
- Separation of reference identification from content extraction
- Reduced risk of content hallucination in entity extraction
Focus on identifying distinct prompt locations that define each entity type, as prompt references are the most reliable distinguishing factor for stable knowledge graphs.
CRITICAL ID FORMAT REQUIREMENT: Generate entity IDs using ONLY the format TYPE_SEQUENTIAL_NUMBER starting from 001.
Examples: "agent_001", "task_001", "human_001", "tool_001", "input_001", "output_001"
NEVER use names, emails, descriptions, or content as entity IDs.
INDENTATION RULE FOR CHUNKED LINES:
- When a single line from the original input is too long, it will be chunked into multiple lines.
- The first chunk will appear at the normal indentation level.
- All subsequent chunks of that same original line will be INDENTED with two spaces.
- This indentation is a visual cue that the indented lines are continuations of the preceding non-indented line.
LINE COUNTING METHODOLOGY (CRITICAL FOR ACCURACY):
Follow this systematic approach to avoid counting errors:
STEP 1 - CONTENT IDENTIFICATION:
- First, identify the exact content you need to reference
- Note distinctive words or phrases that will serve as anchors
- Determine if the content spans single or multiple lines
STEP 2 - ANCHOR-BASED POSITIONING:
- Find a unique phrase or pattern near the target content
- Search for that anchor text in the numbered input
- Use the anchor to locate the general area, then count precisely
STEP 3 - SYSTEMATIC LINE COUNTING:
- Count <L#> markers sequentially from a known reference point
- Do NOT skip or approximate - count every single <L#> marker
- Pay attention to indented continuation lines (they have their own <L#>)
- For long content, count in chunks and verify totals
STEP 4 - VERIFICATION:
- Double-check by counting backwards from a different reference point
- Verify the line_start contains the beginning of your target content
- Verify the line_end contains the end of your target content
- Ensure line_end >= line_start
COMMON COUNTING ERRORS TO AVOID:
- Skipping indented continuation lines that have <L#> markers
- Miscounting when jumping between distant parts of the input
- Confusing similar content in different locations
- Using approximate positions instead of exact <L#> marker counts
CONTENT REFERENCE INSTRUCTIONS:
- For each distinct prompt (regardless of length), you must find **ALL** occurrences in the input trace.
- The `raw_prompt_ref` field for an entity must be a **LIST** of `ContentReference` objects, one for each location where that prompt appears.
- Each `ContentReference` object should contain the `line_start` and `line_end` for that specific occurrence.
- **CRITICAL: You MUST NOT omit any occurrence. Be COMPREHENSIVE, not conservative. It's better to include more references than to miss any.**
- **For function-based tools: Include EVERY line where the function name appears (imports, calls, error messages, etc.)**
- **For agents: Include EVERY message or mention of the agent name**
- **For tasks: Include EVERY reference to the task or its components**
ACCURACY VERIFICATION CHECKLIST (complete before submitting):
✓ I have identified unique anchor text near each content location
✓ I have counted <L#> markers systematically, not approximately
✓ I have verified line_start contains the actual content beginning
✓ I have verified line_end contains the actual content ending
✓ I have double-checked my counting using a different reference point
Example (duplicate system prompt with verification):
```json
{{
"id": "agent_001",
"type": "Agent",
"name": "Time Tracker Agent",
"raw_prompt": "", // left blank per guidelines
"raw_prompt_ref": [
{{"line_start": 3, "line_end": 5}},
{{"line_start": 20, "line_end": 22}}
]
}}
```
In this example:
- The same system prompt occurs twice and both locations are captured
- Anchor text "Time Tracker Agent" was used to locate both occurrences
- Line counting was verified by counting from <L1> to each location
- Each occurrence was double-checked by counting backwards from <L25>
PROMPT-BASED ENTITY EXTRACTION RULES:
1. Agents (System Prompt Entities)
- Each DISTINCT system prompt defines a separate Agent entity
- Extract complete system prompts that define agent roles, capabilities, and behaviors
- raw_prompt MUST be an empty string "" (leave blank). Provide the actual system prompt via one or more `raw_prompt_ref` entries.
- Name should reflect the agent's role as defined in the system prompt
- Multiple agents with identical system prompts = single entity
2. Tasks (Instruction Prompt Entities)
- Each DISTINCT instruction prompt defines a separate Task entity
- Extract complete instruction prompts that define task objectives and requirements
- raw_prompt MUST be an empty string "" (leave blank). Provide the full instruction prompt via `raw_prompt_ref`.
- Name should reflect the task objective as defined in the instruction prompt
- Multiple tasks with identical instruction prompts = single entity
3. Tools (Description Prompt Entities)
- Each DISTINCT tool description/specification defines a separate Tool entity
- Extract complete tool descriptions including function signatures, parameters, and purpose
- raw_prompt MUST be an empty string "" (leave blank). Provide the full tool description/specification via `raw_prompt_ref`.
- Name should reflect the tool's function as defined in the description prompt
4. Inputs (Input Format Prompt Entities)
- Each DISTINCT input data format specification defines a separate Input entity
- Extract format specifications, schema definitions, or data structure descriptions
- raw_prompt MUST be an empty string "" (leave blank). Provide the full input format specification via `raw_prompt_ref`.
- Name should reflect the input data type as defined in the format specification
- Focus on data format prompts, not individual data values
- Examples: Database schema definitions, API request formats, file structure specifications
5. Outputs (Output Format Prompt Entities)
- Each DISTINCT output format specification defines a separate Output entity
- Extract format specifications for generated results, reports, or responses
- raw_prompt MUST be an empty string "" (leave blank). Provide the full output format specification via `raw_prompt_ref`.
- Name should reflect the output type as defined in the format specification
- Focus on output format prompts, not individual output values
- Examples: Report templates, response formats, file output specifications
6. Humans (Optional Prompt Entities)
- Each DISTINCT human interaction pattern defines a separate Human entity
- Extract interaction prompts that define human roles, feedback patterns, or intervention methods
- raw_prompt MUST be an empty string "" (leave blank). Provide the full interaction specification via `raw_prompt_ref`.
- Name should reflect the human role as defined in the interaction prompt (e.g., "Business Analyst", "Data Scientist")
- ID must follow format: "human_001", "human_002", etc. (NEVER use email addresses or actual names as IDs)
- Only create if there are explicit human interaction prompts or feedback specifications
- IMPORTANT: If you find email addresses like "skandha.tandra@unilever.com", put them in the name field, but use "human_001" as the ID
PROMPT-BASED ASSIGNMENT REQUIREMENTS:
- Assign unique IDs to all entities based on PROMPT UNIQUENESS, not names or descriptions
- Entities with IDENTICAL prompts = SINGLE entity (even if names differ)
- Entities with DIFFERENT prompts = SEPARATE entities (even if names are similar)
- Use only these entity types: "Agent", "Task", "Tool", "Input", "Output", "Human"
- Focus on extracting COMPLETE prompt REFERENCES that define each entity's behavior/specification
- Names should be derived from prompt content understanding, not abstract classifications
- **CRITICAL: The raw_prompt field MUST ALWAYS BE EMPTY - only raw_prompt_ref should be populated**
ENTITY ID GENERATION RULES (MANDATORY FORMAT):
- Use ONLY this format: TYPE_SEQUENTIAL_NUMBER (e.g., "agent_001", "task_001", "tool_001")
- Sequential numbering starts from 001 for each entity type
- NEVER use actual names, emails, or content as IDs
- Examples of CORRECT IDs:
* Agent entities: "agent_001", "agent_002", "agent_003"
* Task entities: "task_001", "task_002", "task_003"
* Tool entities: "tool_001", "tool_002", "tool_003"
* Input entities: "input_001", "input_002", "input_003"
* Output entities: "output_001", "output_002", "output_003"
* Human entities: "human_001", "human_002", "human_003"
- Examples of INCORRECT IDs:
* "skandha.tandra@unilever.com" (email address)
* "SQL Query Generator" (entity name)
* "Generate Spend Analysis Task" (entity description)
- CRITICAL: The relationship analyzer will use these exact ID values to create connections
**REFERENCE-ONLY EXTRACTION REQUIREMENTS:**
- **raw_prompt field**: MUST be empty string "" for ALL entities
- **raw_prompt_ref field**: MUST contain location references to where the prompt content appears
- **DO NOT extract actual content**: Your job is to identify locations, not extract text
- **Content will be extracted later**: Other functions will use your references to get actual content
Raw Prompt Reference Extraction (Identify locations of actual runtime prompts from agent system traces):
Identify the LOCATIONS of ACTUAL prompts, instructions, and configurations that were used during system execution.
Focus on finding the real runtime context locations, not generic descriptions.
AGENT ENTITIES - Extract complete agent definitions:
Look for agent framework patterns (CrewAI, LangChain, AutoGen, etc.) and extract:
- Complete role definitions: "role='Entity Extractor'" or "You are an Entity Extractor"
- Goal statements: "goal='Identify and categorize entities'"
- Backstory/context: Full backstory or system context provided to the agent
- System prompts: Any "system:" messages or agent initialization prompts
- Agent configurations: Model settings, temperature, max_tokens if present
CONVERSATIONAL AGENT DETECTION (CRITICAL FOR MULTI-AGENT TRACES):
In addition to explicit system prompts, also identify agents from conversational patterns:
1. AGENT NAME PATTERNS:
- Look for consistent agent names that appear as message senders (e.g., "ProblemSolving_Expert", "Verification_Expert")
- Agent names often contain role indicators: "_Expert", "_Agent", "_Assistant", "_Bot", "_terminal"
- Names with specialized domains: "ArithmeticProgressions_Expert", "Computer_terminal", "SQL_Agent"
2. CONVERSATIONAL AGENT INDICATORS:
- Messages from the same named entity across multiple interactions
- Specialized responses showing domain expertise (e.g., mathematical calculations, code execution, verification)
- Agent-to-agent communication patterns (addressing other agents by name)
- Consistent role behavior (e.g., always providing verification, always executing code)
3. AGENT IDENTIFICATION STRATEGY:
- Create ONE Agent entity per UNIQUE agent name that appears in conversations
- Use the agent's first substantial message as the raw_prompt_ref (their introduction or first meaningful contribution)
- If no explicit system prompt exists, use their first message that demonstrates their role/capabilities
- Name the entity based on their apparent role and domain expertise
4. EXAMPLES OF CONVERSATIONAL AGENTS:
- "ProblemSolving_Expert" → Agent entity for problem-solving expertise
- "Verification_Expert" → Agent entity for verification and validation
- "Computer_terminal" → Agent entity for code execution and system interaction
- "ArithmeticProgressions_Expert" → Agent entity for mathematical calculations
- "SQL_Agent" → Agent entity for database operations
5. AGENT ENTITY CREATION RULES FOR CONVERSATIONS:
- Each unique agent name = separate Agent entity
- **COMPREHENSIVE CONTENT REFERENCES: Include ALL messages from this agent, not just the first one**
- Include their introduction message, substantial contributions, and even status updates
- Be exhaustive: every line where the agent name appears or where they send a message
- Name should reflect their role: "ProblemSolving_Expert system prompt" → "Problem Solving Expert"
- Description should summarize their demonstrated capabilities in the conversation
TASK ENTITIES - Extract specific task instructions:
Look for actual task definitions and instructions:
- Task descriptions: Complete task objectives and requirements
- Input parameters: Specific data, queries, or context provided to the task
- Expected outputs: Defined output formats or requirements
- Task constraints: Limitations, rules, or guidelines
- Execution context: Timing, dependencies, or environmental factors
TOOL ENTITIES - CRITICAL: Extract ALL tools, especially function-based tools:
**MANDATORY DETECTION PATTERNS:**
1. Function imports: "from functions import perform_web_search" → Extract "perform_web_search" as Tool
2. Function calls: "perform_web_search(query, count=20)" → Extract "perform_web_search" as Tool
3. Function usage: "results = perform_web_search(...)" → Extract "perform_web_search" as Tool
4. Error mentions: "perform_web_search returned None" → Extract "perform_web_search" as Tool
**EXTRACTION REQUIREMENTS:**
- If you see "perform_web_search" ANYWHERE in the trace, you MUST extract it as a Tool entity
- If you see "from functions import [function_name]", extract [function_name] as Tool
- If you see "[function_name](" pattern, extract [function_name] as Tool
- Count usage frequency across all agents
- Determine importance based on usage frequency and failure impact
**COMPREHENSIVE CONTENT REFERENCE REQUIREMENTS FOR TOOLS:**
- Include EVERY line where the tool name appears (be exhaustive, not selective)
- Include import statements: "from functions import perform_web_search"
- Include function calls: "perform_web_search(query, count=20)"
- Include variable assignments: "results = perform_web_search(...)"
- Include error messages: "perform_web_search returned None"
- Include conditional statements: "if perform_web_search(query) is None"
- Include comments or documentation mentioning the tool
- Include any line containing the exact tool name, regardless of context
**TOOL ENTITY FIELDS:**
- name: The exact function name (e.g., "perform_web_search")
- description: Purpose inferred from usage context and parameters
- importance: HIGH if used by multiple agents or causes failures, MEDIUM if used frequently, LOW if used rarely
**DECORATOR-BASED TOOLS (@tool):**
- Tool signatures: Function names, parameters, return types
- Tool descriptions: Purpose and functionality explanations
- Usage examples: How the tool is called with specific parameters
- Tool configurations: Settings, API keys, endpoints (sanitized)
- Error handling: Retry logic, fallback mechanisms
HUMAN ENTITIES - Extract user interactions and feedback:
Capture complete human interactions:
- Original user queries: Full questions or requests
- Feedback statements: Corrections, approvals, or rejections
- Intervention commands: Direct instructions or overrides
- Context provided: Background information or clarifications
- Interaction timing: When feedback was provided
INPUT/OUTPUT ENTITIES - Extract data specifications:
For data entities, capture:
- Data schemas: Column names, types, constraints
- Query specifications: SQL queries, filters, conditions
- File formats: JSON structures, CSV headers, data types
- Business rules: Logic, calculations, or transformations
- Data sources: Database names, table names, API endpoints
EXTRACTION PATTERNS TO LOOK FOR:
1. Agent Framework Patterns:
- CrewAI: "Agent(role=..., goal=..., backstory=...)"
- LangChain: "SystemMessage(content=...)"
- AutoGen: "ConversableAgent(name=..., system_message=...)"
1b. Conversational Agent Patterns:
- Named message senders: "ProblemSolving_Expert (assistant): [message content]"
- Agent role indicators: "Verification_Expert", "Computer_terminal", "ArithmeticProgressions_Expert"
- Multi-agent conversations: agents addressing each other by name
- Specialized responses: mathematical calculations, code execution, domain expertise
- Agent introductions: "You are given: (1) a task..." or "To solve the task..."
2. Task Patterns:
- "Task(description=..., expected_output=...)"
- "Please [action] with [parameters]"
- "Your task is to [objective]"
3. Tool Patterns:
- "@tool" decorators with function definitions
- "Action: [tool_name]" with "Action Input: [parameters]"
- API calls with endpoints and parameters
- Function imports: "from [module] import [function_name]"
- Function calls: "[function_name]([parameters])" with multiple usage instances
- Module function calls: "[module].[function_name]([parameters])"
- Utility functions used across multiple agents or contexts
4. Human Interaction Patterns:
- Direct user messages or queries
- Feedback like "That's not correct, try again"
- Approvals like "Yes, proceed with this approach"
FORMATTING REQUIREMENTS:
- Preserve original formatting, indentation, and structure when possible
- Use triple quotes for multi-line prompts
- Include parameter names and types for tools
- Maintain JSON/YAML structure for configurations
- Sanitize sensitive information (API keys, passwords) but keep structure
Examples (showing actual runtime extraction):
```
# Agent prompt example (CrewAI)
Agent(
role='SQL Query Generator',
goal='Generate accurate Databricks SQL queries based on business requirements',
backstory='You are an expert SQL developer specializing in Databricks SQL Warehouse. You understand complex business logic and can translate natural language requirements into efficient SQL queries.',
llm='gpt-5-mini'
)
```
```
# Task prompt example
Task(
description='Generate a SQL query to compare spend and supplier count for fatty alcohol purchases between 2023 and 2024. Include filters for plant exclusions and intercompany indicators.',
expected_output='A complete SQL query with proper joins, filters, and aggregations that can be executed in Databricks SQL Warehouse'
)
```
```
# Tool prompt example (@tool decorator)
@tool
def databricks_sql_executor(query: str, warehouse_id: str) -> dict:
\"\"\"Execute SQL queries in Databricks SQL Warehouse
Args:
query: SQL query string to execute
warehouse_id: Databricks warehouse identifier
Returns:
Dictionary with query results and metadata
\"\"\"
```
```
# COMPREHENSIVE TOOL EXTRACTION EXAMPLE
# ALL these lines should be included in raw_prompt_ref for "perform_web_search":
# Line 45: from functions import perform_web_search
# Line 67: results = perform_web_search(query="machine learning trends", count=20)
# Line 89: search_results = perform_web_search(query="AI applications", count=15)
# Line 102: if perform_web_search(query) is None:
# Line 156: logger.error("perform_web_search returned None")
# Line 203: # Using perform_web_search for data retrieval
# Line 234: except Exception as e: # perform_web_search failed
# RESULT: Extract ALL 7 occurrences as ContentReference objects
{{
"id": "tool_001",
"type": "Tool",
"name": "perform_web_search",
"raw_prompt_ref": [
{{"line_start": 45, "line_end": 45}}, # import statement
{{"line_start": 67, "line_end": 67}}, # first function call
{{"line_start": 89, "line_end": 89}}, # second function call
{{"line_start": 102, "line_end": 102}}, # conditional check
{{"line_start": 156, "line_end": 156}}, # error message
{{"line_start": 203, "line_end": 203}}, # comment mention
{{"line_start": 234, "line_end": 234}} # exception comment
]
}}
```
```
# Human prompt example
Can you compare the spend and SupplierName count on PurchaseCommodityName fatty alcohol for 2023 and 2024 and share insights? I need this for the quarterly business review.
```
IMPORTANCE ASSESSMENT REQUIREMENTS:
For each entity, you MUST assign an importance level based on its role in the system:
HIGH IMPORTANCE:
- Core agents that coordinate or manage other agents
- Critical tasks that are essential for system function or user goals
- Essential tools that multiple agents depend on (e.g., perform_web_search used by multiple agents)
- Function-based tools with frequent usage across the workflow
- Primary inputs that drive the entire workflow
- Final outputs that represent the main system deliverables
- Key human stakeholders who make critical decisions
MEDIUM IMPORTANCE:
- Supporting agents with specialized but non-critical functions
- Standard operational tasks that support the main workflow
- Commonly used tools that enhance functionality (e.g., utility functions used occasionally)
- Function-based tools with moderate usage frequency
- Secondary inputs that provide additional context
- Intermediate outputs that feed into other processes
- Regular human users who provide routine input
LOW IMPORTANCE:
- Auxiliary agents with very specific or rare functions
- Simple tasks with minimal impact on overall system success
- Rarely used tools or utilities (e.g., debugging functions used once)
- Function-based tools with single or infrequent usage
- Optional inputs that provide minor enhancements
- Diagnostic or logging outputs
- Occasional human observers or reviewers
ASSESSMENT GUIDELINES:
- Consider the entity's centrality in the workflow
- Evaluate how many other entities depend on this one
- Assess the impact if this entity failed or was removed
- Look at frequency and criticality of usage patterns
- Consider whether the entity is replaceable or unique
- For function-based tools: Count usage frequency and cross-agent dependencies
"""
entity_extractor_agent_info = {
"role": "Entity Extractor",
"goal": "Extract all entities with proper types, importance levels, and raw prompts from agent trace data",
"backstory": """You specialize in identifying entities within various data sources. You can recognize agent names,
tools, tasks, and other important elements in logs, documentation, model cards, or natural language descriptions.
You're particularly skilled at extracting model information, parameters, and performance metrics when available.
You create concise, informative one-sentence descriptions for every entity you identify, capturing its core purpose
or function in a way that helps others understand its role in the system.
Your expertise helps create comprehensive knowledge graphs by ensuring all relevant entities are properly identified,
categorized, and described. You focus on detail and ensure nothing important is missed, regardless of the format of the input data.""",
"task_expected_output": "A list of properly structured entities with types, importance levels, and prompts",
}
system_prompt = create_system_prompt(
role=entity_extractor_agent_info["role"],
backstory=entity_extractor_agent_info["backstory"],
goal=entity_extractor_agent_info["goal"],
)
task_description = create_task_prompt(
task_description=ENTITY_EXTRACTION_INSTRUCTION_PROMPT,
task_expected_output=entity_extractor_agent_info["task_expected_output"],
response_format=EntityExtractionList,
previous_step_output=None,
)
return await run_agent(
model_name=os.environ["OPENAI_MODEL_NAME"],
system_prompt=system_prompt,
task_description=task_description,
response_format=EntityExtractionList,
)
async def relationship_analyzer(input_data, previous_step_output, context_documents="") -> RelationshipExtractionList:
RELATION_EXTRACTION_INSTRUCTION_PROMPT = f"""Map all relationships between system entities (identified in the previous step from this window) using ONLY the predefined relationship types.
Your analysis should focus on interactions described *within this specific window* of a larger chronological trace.
**CONTEXT DOCUMENTS AVAILABLE:**
The following context documents are available to enhance your understanding:
- {context_documents}
**PRIMARY INPUT DATA:**
Here is the main trace you are analyzing:
- {input_data}
MANDATORY: You MUST reference the exact entity list from the previous step with their IDs.
Every entity will have an ID in the format: TYPE_NUMBER (e.g., "agent_001", "human_001", "task_001")
You can ONLY use these exact IDs in your relationship source and target fields.
**CONTEXT-ENHANCED RELATIONSHIP ANALYSIS:**
Use the provided context documents to:
1. Better understand domain-specific workflows and processes
2. Identify standard relationship patterns in the business domain
3. Apply any provided guidelines for relationship categorization
4. Reference examples to understand expected relationship types
5. Recognize technical dependencies and data flows specific to the domain
Identify these 10 relationship types:
1. CONSUMED_BY: Input is processed by Agent
2. PERFORMS: Agent executes Task (focus on actual execution)
3. ASSIGNED_TO: Task delegated to Agent (focus on responsibility)
4. USES: Agent utilizes Tool
5. REQUIRED_BY: Tool is needed by Task
6. SUBTASK_OF: Task is component of parent Task
7. NEXT: Task follows another Task sequentially
8. PRODUCES: Task generates Output
9. DELIVERS_TO: Output is delivered to Human
10. INTERVENES: Agent/Human corrects Task
Critical distinctions:
- CONSUMED_BY: Input→Agent = data processing
- PERFORMS: Agent→Task = actual execution
- ASSIGNED_TO: Task→Agent = responsibility assignment
- DELIVERS_TO: Output→Human = final delivery
- INTERVENES: Agent/Human→Task = active correction/override
RELATIONSHIP EXTRACTION GUIDELINES:
When identifying relationships, be careful to ONLY map connections between actual entities:
1. DO NOT create these relationships:
- Between framework containers (e.g., "Crew", "Pipeline") and other entities
- Using execution IDs or session identifiers as entities
- Between status indicators and actual entities
- Between log formatting elements and actual entities
2. DO create relationships between:
- Actual named agents (e.g., "Organizer", "Thinker") and their tasks
- Agents and the specific tools they use
- Tasks and the tools they require
- Tasks that have sequential or hierarchical dependencies
- Entities and the actual inputs/outputs they consume/produce
- Human participants and the entities they review/modify
3. For agent frameworks:
- The framework container (e.g., "Crew", "Pipeline") is NOT an entity and should NOT have relationships
- Task IDs should be replaced with actual task names/descriptions in relationships
- Focus on the meaningful operational relationships, not the framework structure
EXAMPLE:
In a log entry like:
"🚀 Crew: crew
└── 📋 Task: abc-123 (Generate creative text)
Status: Executing Task...
└── 🤖 Agent: Researcher
Status: In Progress"
CORRECT relationship (if "Generate creative text" is an identified Task entity and "Researcher" an Agent entity):
- "Researcher PERFORMS Generate creative text"
INCORRECT relationships:
- "crew PERFORMS abc-123" (framework container to task ID, unless 'crew' is a defined entity and interacts)
- "Researcher PERFORMS abc-123" (using task ID instead of description from entity list)
For each relationship:
- CRITICAL: Use the exact entity.id field values (NOT entity.name) for source and target fields
- Source field must contain the exact ID of an entity from the extracted entities list
- Target field must contain the exact ID of an entity from the extracted entities list
- Clearly define the relationship type and its directionality (source → relationship → target)
- Populate interaction_prompt according to the prompt-based requirements above
- VALIDATION: Every source and target ID MUST correspond to an existing entity.id in the entities list
INTERACTION-BASED interaction_prompt content requirements:
- For CONSUMED_BY: Extract the ACTUAL DATA CONSUMPTION MESSAGE/LOG showing how the agent processed the input data
- For PERFORMS: Extract the ACTUAL EXECUTION MESSAGE/LOG showing the agent starting or executing the task
- For ASSIGNED_TO: Extract the ACTUAL ASSIGNMENT MESSAGE/LOG showing the task being delegated to the agent
- For USES: Extract the ACTUAL TOOL USAGE MESSAGE/LOG showing the agent calling or using the tool
- For REQUIRED_BY: Extract the ACTUAL REQUIREMENT MESSAGE/LOG showing the task needing or requesting the tool
- For SUBTASK_OF: Extract the ACTUAL HIERARCHICAL MESSAGE/LOG showing the parent-child task relationship
- For NEXT: Extract the ACTUAL SEQUENCE MESSAGE/LOG showing one task following another
- For PRODUCES: Extract the ACTUAL OUTPUT GENERATION MESSAGE/LOG showing the task creating the output
- For DELIVERS_TO: Extract the ACTUAL DELIVERY MESSAGE/LOG showing the output being sent to the human
- For INTERVENES: Extract the ACTUAL INTERVENTION MESSAGE/LOG showing the human/agent correcting the task
**CRITICAL: REFERENCE-ONLY INTERACTION EXTRACTION**
- You MUST leave the `interaction_prompt` field as an empty string "" for ALL relationships
- You MUST ONLY populate the `interaction_prompt_ref` field with location references to runtime interaction evidence
- DO NOT extract or include the actual interaction content - only identify WHERE it is located
- The actual interaction content will be extracted later by other functions using your references
- When you find interaction evidence you MUST enumerate every **contiguous occurrence** of that interaction text in the numbered trace and include one `ContentReference` object per occurrence in the `interaction_prompt_ref` list
- interaction_prompt_ref points to WHERE in the trace this specific interaction occurred (not static definitions)
- If no explicit interaction evidence exists in the trace, set interaction_prompt="" and interaction_prompt_ref=[]
Example with reference-only interaction:
```json
{{
"type": "USES",
"source": "agent_001",
"target": "tool_001",
"interaction_prompt": "", // ALWAYS EMPTY - DO NOT FILL
"interaction_prompt_ref": [
{{ "line_start": 120, "line_end": 120 }},
{{ "line_start": 250, "line_end": 250 }}
]
}}
```
Entity type constraints (STRICT):
- CONSUMED_BY: Input→Agent
- PERFORMS: Agent→Task
- ASSIGNED_TO: Task→Agent
- USES: Agent→Tool
- REQUIRED_BY: Tool→Task
- SUBTASK_OF: Task→Task
- NEXT: Task→Task
- PRODUCES: Task→Output (only Task can produce Output)
- DELIVERS_TO: Output→Human
- INTERVENES: Agent/Human→Task (either Agent or Human can intervene in tasks)
Data flow analysis:
- For CONSUMED_BY: Track explicit and implicit inputs, consumption patterns by agents
- For PRODUCES: Track artifacts, intermediate and final outputs from tasks
- For DELIVERS_TO: Track final delivery of outputs to humans
- Identify data transformations and potential failure points
CRITICAL ID MATCHING REQUIREMENT:
- Use ONLY the exact entity.id values in source and target fields
- DO NOT use entity.name values in source/target fields
- Every relationship source/target must reference an existing entity.id
- Example: If entity has id="agent_001" and name="SQL Query Generator", use "agent_001" in relationships
- VALIDATION: Check that every source and target ID exists in the entities list before creating the relationship
Connection requirements:
Every entity MUST connect to at least one other entity. For disconnected entities:
- Agents: Create PERFORMS, CONSUMED_BY, or logical connection based on role
- Tasks: Must have PERFORMS or ASSIGNED_TO, and typically PRODUCES
- Tools: Must have USES or REQUIRED_BY
- Inputs: Must be connected via CONSUMED_BY to at least one agent
- Outputs: Must be produced by at least one task via PRODUCES, and may be delivered via DELIVERS_TO
- Humans: Connect via DELIVERS_TO or INTERVENES
If no obvious connection exists, create a logical CONSUMED_BY or PRODUCES relationship at minimum.
Interaction Prompt Extraction (Capture actual runtime interaction details):
Extract SPECIFIC interaction details that show HOW entities actually interacted during execution.
Focus on real execution context, timing, parameters, and outcomes.
PERFORMS Relationships (Agent→Task):
Extract the actual execution details:
- Task assignment: "Agent X assigned to execute Task Y at timestamp Z"
- Execution parameters: Specific inputs, configurations, constraints provided
- Execution context: Environmental conditions, dependencies, prerequisites
- Progress indicators: Status updates, intermediate results, completion signals
- Performance metrics: Timing, resource usage, success/failure indicators
USES Relationships (Agent→Tool):
Extract specific tool usage details:
- Tool invocation: Exact tool calls with parameters and context
- Usage purpose: Why the tool was needed at this specific moment
- Input/output: Specific data passed to tool and results received
- Usage patterns: Frequency, timing, conditional usage
- Error handling: Retry attempts, fallback mechanisms, error recovery
ASSIGNED_TO Relationships (Task→Agent):
Extract delegation and assignment details:
- Assignment reason: Why this specific agent was chosen for this task
- Delegation context: Who assigned, when, under what conditions
- Responsibility scope: Specific aspects of the task assigned
- Authority level: Decision-making power, escalation procedures
- Success criteria: How completion/success will be measured
CONSUMED_BY Relationships (Input→Agent):
Extract data consumption details:
- Data source: Specific input location, format, access method
- Consumption pattern: How much, how often, under what conditions
- Processing method: Transformation, validation, filtering applied by agent
- Data dependencies: Required data quality, completeness, timeliness
- Consumption triggers: Events or conditions that initiate consumption
PRODUCES Relationships (Task→Output):
Extract output generation details:
- Output specification: Exact format, structure, content requirements
- Generation process: Steps, transformations, calculations performed
- Quality control: Validation, verification, approval processes
- Delivery method: How output is provided, stored, or transmitted
- Output dependencies: Prerequisites, inputs required for generation
DELIVERS_TO/INTERVENES Relationships (Output→Human, Agent/Human→Task):
Extract human interaction details:
- Delivery method: How output reaches human (email, dashboard, report, etc.)
- Delivery criteria: When and under what conditions output is delivered
- Intervention triggers: Conditions that prompted human/agent involvement
- Feedback specifics: Exact corrections, suggestions, approvals given
- Timing context: When delivery/intervention occurred in the process
- Impact assessment: How the delivery/intervention changed the outcome
EXTRACTION PATTERNS TO LOOK FOR:
1. Execution Logs:
- "Agent X started Task Y with parameters {{...}}"
- "Tool Z called with input {{...}} returned {{...}}"
- "Task completed in X seconds with status Y"
2. Delegation Patterns:
- "Assigning Task X to Agent Y because of expertise in Z"
- "Agent Y selected for Task X due to availability and skills"
3. Data Flow Patterns:
- "Processing input data from source X with filters Y"
- "Generated output file Z with format Y containing X records"
4. Human Interaction Patterns:
- "User provided feedback: 'This needs more detail'"
- "Human approval received for proceeding with approach X"
5. Tool Usage Patterns:
- "Executing SQL query on database X with timeout Y"
- "API call to service X with parameters Y returned status Z"
FORMATTING REQUIREMENTS:
- Include timestamps when available
- Preserve parameter names and values
- Include status codes, error messages, success indicators
- Maintain data format specifications
- Show actual values, not generic placeholders
RELATIONSHIP ID MATCHING EXAMPLES:
Given these entities from the previous step:
- Entity 1: {{id: "input_001", name: "Spend Database Schema", type: "Input"}}
- Entity 2: {{id: "agent_001", name: "SQL Query Generator", type: "Agent"}}
- Entity 3: {{id: "task_001", name: "Generate Spend Analysis", type: "Task"}}
- Entity 4: {{id: "output_001", name: "Analysis Report", type: "Output"}}
- Entity 5: {{id: "human_001", name: "Business Analyst", type: "Human"}}
CORRECT relationships:
```
{{
source: "input_001", // Use exact entity.id from entity list
target: "agent_001", // Use exact entity.id from entity list
type: "CONSUMED_BY"
}}
{{
source: "agent_001",
target: "task_001",
type: "PERFORMS"
}}
{{
source: "output_001",
target: "human_001", // Use "human_001", NOT "skandha.tandra@unilever.com"
type: "DELIVERS_TO"
}}
```
INCORRECT relationships (will cause graph errors):
```
{{
source: "Spend Database Schema", // WRONG: using entity.name
target: "SQL Query Generator", // WRONG: using entity.name
type: "CONSUMED_BY"
}}
{{
source: "output_001",
target: "skandha.tandra@unilever.com", // WRONG: using email/content, not entity.id
type: "DELIVERS_TO"
}}
```
COMPLETE REFERENCE-ONLY Examples with interaction_prompt_ref:
```json
// CONSUMED_BY example (Reference to Data Consumption Location)
{{
"type": "CONSUMED_BY",
"source": "input_001",
"target": "agent_001",
"interaction_prompt": "", // ALWAYS EMPTY - DO NOT FILL
"interaction_prompt_ref": [
{{"line_start": 45, "line_end": 45}}
]
}}
```
```json
// USES example (Reference to Tool Usage Location)
{{
"type": "USES",
"source": "agent_001",
"target": "tool_001",
"interaction_prompt": "", // ALWAYS EMPTY - DO NOT FILL
"interaction_prompt_ref": [
{{"line_start": 89, "line_end": 91}}
]
}}
```
```json
// PERFORMS example (Reference to Task Execution Location)
{{
"type": "PERFORMS",
"source": "agent_001",
"target": "task_001",
"interaction_prompt": "", // ALWAYS EMPTY - DO NOT FILL
"interaction_prompt_ref": [
{{"line_start": 67, "line_end": 67}}
]
}}
```
```json
// DELIVERS_TO example (Reference to Output Delivery Location)
{{
"type": "DELIVERS_TO",
"source": "output_001",
"target": "human_001",
"interaction_prompt": "", // ALWAYS EMPTY - DO NOT FILL
"interaction_prompt_ref": [
{{"line_start": 123, "line_end": 123}}
]
}}
```
```json
// INTERVENES example (Reference to Human Intervention Location)
{{
"type": "INTERVENES",
"source": "human_001",
"target": "task_001",
"interaction_prompt": "", // ALWAYS EMPTY - DO NOT FILL
"interaction_prompt_ref": [
{{"line_start": 156, "line_end": 156}}
]
}}
```
- 'PRODUCES' relationships must only originate from 'Task' entities. Do NOT create 'PRODUCES' relationships from 'Agent' or 'Tool' entities. If such a relationship is detected, reassign it to the appropriate Task or remove it.
- 'CONSUMED_BY' relationships must only go from 'Input' to 'Agent'. Do NOT create reverse relationships.
- 'DELIVERS_TO' relationships must only go from 'Output' to 'Human'.
FINAL VALIDATION CHECKLIST:
Before submitting relationships, verify:
1. Every source field contains an exact entity.id from the entities list (format: TYPE_NUMBER)
2. Every target field contains an exact entity.id from the entities list (format: TYPE_NUMBER)
3. No source or target field contains entity names, descriptions, emails, or actual content
4. All relationship types are from the approved list of 10 types
5. Source/target entity types match the constraints for each relationship type
6. SPECIFIC CHECK: No email addresses (like "skandha.tandra@unilever.com") in source/target fields
7. SPECIFIC CHECK: All human references use "human_001", "human_002", etc., not actual names or emails
8. CRITICAL CHECK: For ALL relationships, interaction_prompt MUST be empty string "" - only populate interaction_prompt_ref with location references
9. CRITICAL CHECK: interaction_prompt_ref should point to ACTUAL RUNTIME MESSAGES/LOGS locations, not static prompt definitions or specifications
IMPORTANCE ASSESSMENT REQUIREMENTS:
For each relationship, you MUST assign an importance level based on its role in the system:
HIGH IMPORTANCE:
- Critical data flows that are essential for system operation
- Core agent-task assignments that drive main functionality
- Essential tool usage that multiple workflows depend on
- Primary input consumption that initiates key processes
- Final output delivery to key stakeholders
- Critical intervention relationships that prevent failures
MEDIUM IMPORTANCE:
- Standard operational workflows and data processing
- Common agent-task interactions in normal operation
- Regular tool usage that supports functionality
- Secondary input processing that provides context
- Intermediate output generation for downstream processes
- Routine human interactions and feedback loops
LOW IMPORTANCE:
- Auxiliary connections with minimal system impact
- Optional workflow steps that can be skipped
- Rarely used tool interactions or utilities
- Diagnostic or logging data flows
- Backup or redundant relationships
- Occasional human oversight or monitoring
# ASSESSMENT GUIDELINES:
# - Consider the relationship's criticality to system success
# - Evaluate how often this interaction occurs
# - Assess the impact if this relationship failed
# - Look at whether this connection is replaceable
# - Consider the consequences of removing this relationship
"""
relationship_analyzer_agent_info = {
"role": "Relationship Analyzer",
"goal": "Discover standard relationships between entities using exact entity IDs and predefined relationship types",
"backstory": """You are an expert in understanding relationships and connections between entities.
You can identify when agents delegate tasks, use tools, ask questions of each other, or work
together on tasks from various data sources including logs, documentation, model cards, or natural language descriptions.
You strictly adhere to using only the ten predefined relationship types (CONSUMED_BY, PERFORMS, ASSIGNED_TO, USES,
REQUIRED_BY, SUBTASK_OF, NEXT, PRODUCES, DELIVERS_TO, INTERVENES) and never create custom relationship types. You maintain the correct source and target entity types
for each relationship as defined in the system.
CRITICAL SKILL: You are meticulous about using exact entity.id values (not names) in relationship source and target fields.
You understand that using entity names instead of IDs will break the knowledge graph visualization and cause system errors.
You always double-check that every source and target ID corresponds to an actual entity from the extracted entities list.
You clearly distinguish between:
- PERFORMS (Agent→Task): When an agent actually executes/carries out a task
- ASSIGNED_TO (Task→Agent): When a task is delegated/assigned to an agent as a responsibility
For relationships requiring prompts, you extract the appropriate prompt-based content. For relationships not requiring prompts,
you leave the interaction_prompt field empty.
You see patterns in interactions that others might miss, making you
essential for mapping the complex web of relationships in multi-agent systems,
regardless of how the system information is presented.""",
"task_expected_output": "A list of properly structured relationships with exact entity references",
}
system_prompt = create_system_prompt(
role=relationship_analyzer_agent_info["role"],
backstory=relationship_analyzer_agent_info["backstory"],
goal=relationship_analyzer_agent_info["goal"]
)
task_description = create_task_prompt(
task_description=RELATION_EXTRACTION_INSTRUCTION_PROMPT,
task_expected_output=relationship_analyzer_agent_info["task_expected_output"],
response_format=RelationshipExtractionList,
previous_step_output=previous_step_output,
)
return await run_agent(
model_name=os.environ["OPENAI_MODEL_NAME"],
system_prompt=system_prompt,
task_description=task_description,
response_format=RelationshipExtractionList,
)
async def knowledge_graph_builder(input_data, previous_step_output, context_documents="") -> KnowledgeGraph:
GRAPH_BUILDER_INSTRUCTION_PROMPT = f"""
**CONTEXT DOCUMENTS AVAILABLE:**
The following context documents are available to enhance your understanding:
- {context_documents}
**PRIMARY INPUT DATA:**
Here is the input window you are analysing (with <L#> line numbers):
- {input_data}
**CONTEXT-ENHANCED KNOWLEDGE GRAPH CONSTRUCTION:**
Use the provided context documents to:
1. Create more accurate system names and summaries based on domain knowledge
2. Apply domain-specific importance assessments
3. Follow any provided guidelines for knowledge graph structure
4. Reference examples for system categorization and analysis
5. Incorporate business domain understanding into failure detection
Construct a unified knowledge graph from analyzed entities, relationships, **and detected failures**.
FAILURE LIST REQUIREMENT (YOU must perform this detection):
- Add a top-level field called `failures` (array) to the final JSON.
- Each item must match the `Failure` schema (id, risk_type, description, raw_text, raw_text_ref, affected_id).
- Use the following predefined risk_type values only: AGENT_ERROR, PLANNING_ERROR, EXECUTION_ERROR, RETRIEVAL_ERROR, HALLUCINATION.
- For every distinct mistake or risk you identify in this window, create exactly one Failure object with **all** occurrences referenced via `raw_text_ref`.
- Leave `raw_text` empty "" and rely on `raw_text_ref` for extraction (same convention as prompts).
- `affected_id` should point to the entity or relation most responsible, if applicable; otherwise leave null.
**MANDATORY**: If this window shows *any* error, bug, or incorrect behaviour you **MUST** add at least one Failure object. Unit-tests will fail if the `failures` array is missing or empty.
IF ANY SUCH KEYWORD APPEARS AND THERE IS NO FAILURE OBJECT, THE OUTPUT WILL BE REJECTED.
QUICK CHECKLIST BEFORE YOU SUBMIT:
1. `failures` array exists in top-level JSON.
2. Each Failure has at least one `raw_text_ref` entry.
3. Failure IDs follow sequential `failure_001`, `failure_002`, … order.
4. The first entry in `raw_text_ref` (index 0) must occur **on or before** the dataset's `mistake_step` line.
- The **primary evidence** for a Failure must be the **exact agent message** at the first mistake step—the line where the incorrect answer or erroneous action first appears. Do NOT rely solely on later diagnostic logs.
- Typical evidence keywords include: "ERROR", "Incorrect answer", "Traceback", "I cannot", "Failed to". Capture that specific message line via `raw_text_ref`.
CRITICAL FIRST-SYMPTOM LINE RULE
• The *very first* line that shows the mistake MUST be captured via `raw_text_ref`.
• "First line" means the earliest agent or tool message whose content already demonstrates the error.
• Typical trigger words to scan for: "error", "incorrect", "failed", "traceback", "cannot", "exception", "invalid".
• Mini-example (multi-line traceback):
assistant: Traceback (most recent call last)
assistant: File "...", line 12, in <module>
assistant: ValueError: division by zero ← only this FIRST offending line is referenced
Correct `raw_text_ref` → `[{{"line_start": 2, "line_end": 2}}]`
Example Failure object:
```json
{{
"id": "failure_001",
"risk_type": "AGENT_ERROR",
"description": "Agent provided incorrect SQL syntax causing downstream failure",
"raw_text": "",
"raw_text_ref": [{{"line_start": 42, "line_end": 43}}],
"affected_id": "agent_001"
}}
```
Core requirements:
1. Integrate entities and relationships into a coherent structure
2. Maintain consistent entity references
3. Use ONLY the ten predefined relation types
4. Preserve all prompt content and importance assessments
5. Include metadata with timestamp and statistics
6. Create a descriptive system name (3-7 words)
7. Write a concise 2-3 sentence system summary
8. Include comprehensive system assessment
System naming guidelines:
- Reflect primary purpose and function
- Include key agent roles
- Mention domain/industry if applicable
- Highlight distinctive capabilities
Example names: "Financial Research Collaboration Network", "Customer Support Ticket Triage System"
System summary must explain:
- What the system does (purpose/function)
- How it works (agent coordination pattern)
- Value provided (problem solved)
Example summary: "This system analyzes customer support tickets using a classifier agent and specialist agents to route issues to appropriate departments. It manages workflow and handoffs between specialists. The system reduces response time by matching issues with qualified representatives."
Validation requirements:
1. Include ONLY these relationship types:
- CONSUMED_BY: Input→Agent
- PERFORMS: Agent→Task
- ASSIGNED_TO: Task→Agent
- USES: Agent→Tool
- REQUIRED_BY: Tool→Task
- SUBTASK_OF: Task→Task
- NEXT: Task→Task (sequence)
- PRODUCES: Task→Output
- DELIVERS_TO: Output→Human
- INTERVENES: Agent/Human→Task
2. Confirm task relationships accurately show:
- Sequential dependencies (NEXT)
- Hierarchical structure (SUBTASK_OF)
3. Verify entity IDs (not names) in all relationships
Connectivity validation:
- All entities must connect to at least one other entity
- All inputs must be consumed by agents via CONSUMED_BY
- All outputs must be produced by tasks via PRODUCES
- All outputs should be delivered to humans via DELIVERS_TO when applicable
- All components must be reachable (no isolated subgraphs)
- Clear paths must exist from inputs to outputs through agents and tasks
- All agents must have defined roles
- Document any added connections in metadata.connectivity_fixups
System Integration:
- Focus on comprehensive system analysis and assessment
- Include detailed metadata about system components and interactions
- Document system architecture patterns and design decisions
System assessment:
1. Evaluate overall system importance (HIGH/MEDIUM/LOW) based on:
- Component count and centrality
- Workflow centrality
- Uniqueness/replaceability
- Failure impact
- Single points of failure
- Usage frequency
2. Provide 3-5 sentence justification covering:
- Importance level rationale
- Key assessment factors
- Architecture strengths/vulnerabilities
- Risk mitigations
- Comparison to similar systems
Data flow analysis:
- Map input consumption paths
- Track output production and utilization
- Identify transformation points
- Document critical data paths
- Highlight bottlenecks and redundancies
Output a complete KnowledgeGraph object with entities, relations, metadata, system_name, and system_summary.
"""
knowledge_graph_builder_agent_info = {
"role": "Knowledge Graph Builder",
"goal": "Build a complete, consistent knowledge graph using extracted entities and relationships with proper validation",
"backstory": """You are skilled at organizing information into structured knowledge graphs.
You understand how to represent entities and relationships in a way that captures the essence
of a system. Your knowledge graphs are well-structured, consistent, and follow best practices
for knowledge representation.
You excel at analyzing complex systems holistically to provide overall risk assessments.
You can evaluate the criticality of entire systems based on their components, dependencies,
and role in broader workflows. Your system-level risk analyses help stakeholders understand
key vulnerabilities and critical components that warrant special attention.
You ensure the final output is in a format that can be easily used for further analysis or visualization.""",
"task_expected_output": "A complete knowledge graph with entities, relationships, failures, and metadata",
}
system_prompt = create_system_prompt(
role=knowledge_graph_builder_agent_info["role"],
backstory=knowledge_graph_builder_agent_info["backstory"],
goal=knowledge_graph_builder_agent_info["goal"],
)
full_previous_step_output = {
**previous_step_output,
"incorrect_results": [],
}
kg = None
for i in range(3):
if kg is not None:
full_previous_step_output['incorrect_results'].append(kg.model_dump())
task_description = create_task_prompt(
task_description=GRAPH_BUILDER_INSTRUCTION_PROMPT,
task_expected_output=knowledge_graph_builder_agent_info["task_expected_output"],
response_format=KnowledgeGraph,
previous_step_output=json.dumps(full_previous_step_output, indent=2),
)
kg = await run_agent(
model_name=os.environ["OPENAI_MODEL_NAME"],
system_prompt=system_prompt,
task_description=task_description,
response_format=KnowledgeGraph,
)
valid_result = True
entities_ids = [e.id for e in kg.entities]
for rel in kg.relations:
if rel.source not in entities_ids or rel.target not in entities_ids:
valid_result = False
break
if valid_result:
break
if not valid_result:
valid_relations = []
entities_ids = [e.id for e in kg.entities]
for rel in kg.relations:
if rel.source not in entities_ids or rel.target not in entities_ids:
continue
else:
valid_relations.append(rel)
kg = kg.model_copy(update={"relations": valid_relations})
return kg
async def run(input_data, context: str = None) -> KnowledgeGraph:
entity_extractor_result = await entity_extractor(input_data, context)
previous_step_output = json.dumps(
{
"entities": entity_extractor_result.model_dump(),
},
indent=2,
)
relationship_analyzer_result = await relationship_analyzer(
input_data, previous_step_output, context
)
previous_step_output = {
"entities": entity_extractor_result.model_dump(),
"relations": relationship_analyzer_result.model_dump(),
}
knowledge_graph_result = await knowledge_graph_builder(
input_data, previous_step_output, context
)
return knowledge_graph_result
|