File size: 58,404 Bytes
c2ea5ed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7bc750c
c2ea5ed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
from typing import List
import os

from agentgraph.graph_generation.knowledge_graph_langsmith.knowledge_graph.component_types import (
    Entity,
    Relation,
    KnowledgeGraph,
)
from pydantic import BaseModel
from typing import List


class EntityExtractionList(BaseModel):
    entities: List[Entity] = []


class RelationshipExtractionList(BaseModel):
    relations: List[Relation] = []

from agentgraph.graph_generation.knowledge_graph_langsmith.knowledge_graph.agent_base_utils import (
    run_agent,
    create_system_prompt,
    create_task_prompt,
)
import json

async  def entity_extractor(input_data, context_documents=None) -> EntityExtractionList:
    # Define instruction prompts as strings (extracted from task descriptions)
    ENTITY_EXTRACTION_INSTRUCTION_PROMPT = f"""
Extract and categorize all entities from the provided agent system information using REFERENCE-BASED EXTRACTION as the primary method.

**CONTEXT DOCUMENTS AVAILABLE:**
The following context documents are available to enhance your understanding:
- {context_documents if context_documents else "None provided."}

**PRIMARY INPUT DATA:**
Here is the main trace you are analyzing:
- {input_data}

**CRITICAL: REFERENCE-ONLY EXTRACTION**
- You MUST leave the `raw_prompt` field as an empty string "" for ALL entities
- You MUST ONLY populate the `raw_prompt_ref` field with location references
- DO NOT extract or include the actual prompt content - only identify WHERE it is located
- The actual content will be extracted later by other functions using your references

**CONTEXT-ENHANCED EXTRACTION:**
Use the provided context documents to:
1. Better understand domain-specific terminology and concepts
2. Identify entities that might be domain-specific or technical
3. Recognize patterns and relationships specific to the business domain
4. Apply any provided schemas or guidelines for entity categorization
5. Reference examples to understand expected entity types and formats

**PROMPT DEFINITION**  
A *prompt* is the exact text that will be injected into an LLM and which establishes the behaviour or definition of an entity (system / instruction / specification) or of a relation (interaction excerpt, format specification, etc.).

**CRITICAL MULTI-OCCURRENCE REQUIREMENT (read carefully)**  
- The trace you receive is already numbered with `<L#>` markers.  
- For EVERY distinct prompt you MUST enumerate *all* **contiguous occurrences** of that prompt text in the numbered trace.  
- Represent each occurrence with exactly one `ContentReference` object whose `line_start` is the first `<L#>` line of the block and whose `line_end` is the last `<L#>` line of that same uninterrupted block (indented continuation-lines included).  
- The `raw_prompt_ref` list length **must therefore equal** the number of separate occurrences (not the number of lines). Missing even **one** occurrence will fail validation.  
- Overlap between the references of different entities is acceptable when prompts are truly shared.  
- Tool definitions that begin with `@tool` ARE ALSO PROMPTS. Treat them exactly like other prompts: leave `raw_prompt` blank and add one `ContentReference` per occurrence.  

Example (prompt appears twice across two blocks):  
```json
{{
"id": "agent_001",
"type": "Agent",
"name": "Time Tracker Agent",
"raw_prompt": "",  // ALWAYS EMPTY - DO NOT FILL
"raw_prompt_ref": [
    {{"line_start": 3, "line_end": 3}},
    {{"line_start": 9, "line_end": 9}}
]
}}
```

Tool-definition example (single occurrence with verification):
```json
{{
"id": "tool_001",
"type": "Tool",
"name": "zip_compress",
"raw_prompt": "",  // ALWAYS EMPTY - DO NOT FILL
"raw_prompt_ref": [
    {{"line_start": 15, "line_end": 15}}
]
}}
```
Verification process used:
- Located anchor text "@tool" and "zip_compress" in the input
- Counted from <L1> to find the exact <L15> marker
- Verified <L15> contains the complete tool definition
- **CRITICAL: raw_prompt left empty as required**

CORE PRINCIPLE: Each entity is defined by its DISTINCT PROMPT LOCATION, not by extracting the actual content.
This approach ensures:
- More robust and stable knowledge graphs across multiple traces
- Better entity distinction and relationship mapping
- Separation of reference identification from content extraction
- Reduced risk of content hallucination in entity extraction

Focus on identifying distinct prompt locations that define each entity type, as prompt references are the most reliable distinguishing factor for stable knowledge graphs.

CRITICAL ID FORMAT REQUIREMENT: Generate entity IDs using ONLY the format TYPE_SEQUENTIAL_NUMBER starting from 001.
Examples: "agent_001", "task_001", "human_001", "tool_001", "input_001", "output_001"
NEVER use names, emails, descriptions, or content as entity IDs.

INDENTATION RULE FOR CHUNKED LINES:
- When a single line from the original input is too long, it will be chunked into multiple lines.
- The first chunk will appear at the normal indentation level.
- All subsequent chunks of that same original line will be INDENTED with two spaces.
- This indentation is a visual cue that the indented lines are continuations of the preceding non-indented line.

LINE COUNTING METHODOLOGY (CRITICAL FOR ACCURACY):
Follow this systematic approach to avoid counting errors:

STEP 1 - CONTENT IDENTIFICATION:
- First, identify the exact content you need to reference
- Note distinctive words or phrases that will serve as anchors
- Determine if the content spans single or multiple lines

STEP 2 - ANCHOR-BASED POSITIONING:
- Find a unique phrase or pattern near the target content
- Search for that anchor text in the numbered input
- Use the anchor to locate the general area, then count precisely

STEP 3 - SYSTEMATIC LINE COUNTING:
- Count <L#> markers sequentially from a known reference point
- Do NOT skip or approximate - count every single <L#> marker
- Pay attention to indented continuation lines (they have their own <L#>)
- For long content, count in chunks and verify totals

STEP 4 - VERIFICATION:
- Double-check by counting backwards from a different reference point
- Verify the line_start contains the beginning of your target content
- Verify the line_end contains the end of your target content
- Ensure line_end >= line_start

COMMON COUNTING ERRORS TO AVOID:
- Skipping indented continuation lines that have <L#> markers
- Miscounting when jumping between distant parts of the input
- Confusing similar content in different locations
- Using approximate positions instead of exact <L#> marker counts

CONTENT REFERENCE INSTRUCTIONS:
- For each distinct prompt (regardless of length), you must find **ALL** occurrences in the input trace.
- The `raw_prompt_ref` field for an entity must be a **LIST** of `ContentReference` objects, one for each location where that prompt appears.
- Each `ContentReference` object should contain the `line_start` and `line_end` for that specific occurrence.
- **CRITICAL: You MUST NOT omit any occurrence. Be COMPREHENSIVE, not conservative. It's better to include more references than to miss any.**
- **For function-based tools: Include EVERY line where the function name appears (imports, calls, error messages, etc.)**
- **For agents: Include EVERY message or mention of the agent name**
- **For tasks: Include EVERY reference to the task or its components**

ACCURACY VERIFICATION CHECKLIST (complete before submitting):
✓ I have identified unique anchor text near each content location
✓ I have counted <L#> markers systematically, not approximately
✓ I have verified line_start contains the actual content beginning
✓ I have verified line_end contains the actual content ending
✓ I have double-checked my counting using a different reference point

Example (duplicate system prompt with verification):
```json
{{
"id": "agent_001",
"type": "Agent",
"name": "Time Tracker Agent",
"raw_prompt": "",  // left blank per guidelines
"raw_prompt_ref": [
    {{"line_start": 3,  "line_end": 5}},
    {{"line_start": 20, "line_end": 22}}
]
}}
```
In this example:
- The same system prompt occurs twice and both locations are captured
- Anchor text "Time Tracker Agent" was used to locate both occurrences
- Line counting was verified by counting from <L1> to each location
- Each occurrence was double-checked by counting backwards from <L25>

PROMPT-BASED ENTITY EXTRACTION RULES:

1. Agents (System Prompt Entities)
- Each DISTINCT system prompt defines a separate Agent entity
- Extract complete system prompts that define agent roles, capabilities, and behaviors
- raw_prompt MUST be an empty string "" (leave blank). Provide the actual system prompt via one or more `raw_prompt_ref` entries.
- Name should reflect the agent's role as defined in the system prompt
- Multiple agents with identical system prompts = single entity

2. Tasks (Instruction Prompt Entities)
- Each DISTINCT instruction prompt defines a separate Task entity
- Extract complete instruction prompts that define task objectives and requirements
- raw_prompt MUST be an empty string "" (leave blank). Provide the full instruction prompt via `raw_prompt_ref`.
- Name should reflect the task objective as defined in the instruction prompt
- Multiple tasks with identical instruction prompts = single entity

3. Tools (Description Prompt Entities)
- Each DISTINCT tool description/specification defines a separate Tool entity
- Extract complete tool descriptions including function signatures, parameters, and purpose
- raw_prompt MUST be an empty string "" (leave blank). Provide the full tool description/specification via `raw_prompt_ref`.
- Name should reflect the tool's function as defined in the description prompt

4. Inputs (Input Format Prompt Entities)
- Each DISTINCT input data format specification defines a separate Input entity
- Extract format specifications, schema definitions, or data structure descriptions
- raw_prompt MUST be an empty string "" (leave blank). Provide the full input format specification via `raw_prompt_ref`.
- Name should reflect the input data type as defined in the format specification
- Focus on data format prompts, not individual data values
- Examples: Database schema definitions, API request formats, file structure specifications

5. Outputs (Output Format Prompt Entities)
- Each DISTINCT output format specification defines a separate Output entity
- Extract format specifications for generated results, reports, or responses
- raw_prompt MUST be an empty string "" (leave blank). Provide the full output format specification via `raw_prompt_ref`.
- Name should reflect the output type as defined in the format specification
- Focus on output format prompts, not individual output values
- Examples: Report templates, response formats, file output specifications

6. Humans (Optional Prompt Entities)
- Each DISTINCT human interaction pattern defines a separate Human entity
- Extract interaction prompts that define human roles, feedback patterns, or intervention methods
- raw_prompt MUST be an empty string "" (leave blank). Provide the full interaction specification via `raw_prompt_ref`.
- Name should reflect the human role as defined in the interaction prompt (e.g., "Business Analyst", "Data Scientist")
- ID must follow format: "human_001", "human_002", etc. (NEVER use email addresses or actual names as IDs)
- Only create if there are explicit human interaction prompts or feedback specifications
- IMPORTANT: If you find email addresses like "skandha.tandra@unilever.com", put them in the name field, but use "human_001" as the ID

PROMPT-BASED ASSIGNMENT REQUIREMENTS:
- Assign unique IDs to all entities based on PROMPT UNIQUENESS, not names or descriptions
- Entities with IDENTICAL prompts = SINGLE entity (even if names differ)
- Entities with DIFFERENT prompts = SEPARATE entities (even if names are similar)
- Use only these entity types: "Agent", "Task", "Tool", "Input", "Output", "Human"
- Focus on extracting COMPLETE prompt REFERENCES that define each entity's behavior/specification
- Names should be derived from prompt content understanding, not abstract classifications
- **CRITICAL: The raw_prompt field MUST ALWAYS BE EMPTY - only raw_prompt_ref should be populated**

ENTITY ID GENERATION RULES (MANDATORY FORMAT):
- Use ONLY this format: TYPE_SEQUENTIAL_NUMBER (e.g., "agent_001", "task_001", "tool_001")
- Sequential numbering starts from 001 for each entity type
- NEVER use actual names, emails, or content as IDs
- Examples of CORRECT IDs:
* Agent entities: "agent_001", "agent_002", "agent_003"
* Task entities: "task_001", "task_002", "task_003" 
* Tool entities: "tool_001", "tool_002", "tool_003"
* Input entities: "input_001", "input_002", "input_003"
* Output entities: "output_001", "output_002", "output_003"
* Human entities: "human_001", "human_002", "human_003"
- Examples of INCORRECT IDs:
* "skandha.tandra@unilever.com" (email address)
* "SQL Query Generator" (entity name)
* "Generate Spend Analysis Task" (entity description)
- CRITICAL: The relationship analyzer will use these exact ID values to create connections

**REFERENCE-ONLY EXTRACTION REQUIREMENTS:**
- **raw_prompt field**: MUST be empty string "" for ALL entities
- **raw_prompt_ref field**: MUST contain location references to where the prompt content appears
- **DO NOT extract actual content**: Your job is to identify locations, not extract text
- **Content will be extracted later**: Other functions will use your references to get actual content

Raw Prompt Reference Extraction (Identify locations of actual runtime prompts from agent system traces):
Identify the LOCATIONS of ACTUAL prompts, instructions, and configurations that were used during system execution.
Focus on finding the real runtime context locations, not generic descriptions.

AGENT ENTITIES - Extract complete agent definitions:
Look for agent framework patterns (CrewAI, LangChain, AutoGen, etc.) and extract:
- Complete role definitions: "role='Entity Extractor'" or "You are an Entity Extractor"
- Goal statements: "goal='Identify and categorize entities'" 
- Backstory/context: Full backstory or system context provided to the agent
- System prompts: Any "system:" messages or agent initialization prompts
- Agent configurations: Model settings, temperature, max_tokens if present

CONVERSATIONAL AGENT DETECTION (CRITICAL FOR MULTI-AGENT TRACES):
In addition to explicit system prompts, also identify agents from conversational patterns:

1. AGENT NAME PATTERNS:
- Look for consistent agent names that appear as message senders (e.g., "ProblemSolving_Expert", "Verification_Expert")
- Agent names often contain role indicators: "_Expert", "_Agent", "_Assistant", "_Bot", "_terminal"
- Names with specialized domains: "ArithmeticProgressions_Expert", "Computer_terminal", "SQL_Agent"

2. CONVERSATIONAL AGENT INDICATORS:
- Messages from the same named entity across multiple interactions
- Specialized responses showing domain expertise (e.g., mathematical calculations, code execution, verification)
- Agent-to-agent communication patterns (addressing other agents by name)
- Consistent role behavior (e.g., always providing verification, always executing code)

3. AGENT IDENTIFICATION STRATEGY:
- Create ONE Agent entity per UNIQUE agent name that appears in conversations
- Use the agent's first substantial message as the raw_prompt_ref (their introduction or first meaningful contribution)
- If no explicit system prompt exists, use their first message that demonstrates their role/capabilities
- Name the entity based on their apparent role and domain expertise

4. EXAMPLES OF CONVERSATIONAL AGENTS:
- "ProblemSolving_Expert" → Agent entity for problem-solving expertise
- "Verification_Expert" → Agent entity for verification and validation
- "Computer_terminal" → Agent entity for code execution and system interaction
- "ArithmeticProgressions_Expert" → Agent entity for mathematical calculations
- "SQL_Agent" → Agent entity for database operations

5. AGENT ENTITY CREATION RULES FOR CONVERSATIONS:
- Each unique agent name = separate Agent entity
- **COMPREHENSIVE CONTENT REFERENCES: Include ALL messages from this agent, not just the first one**
- Include their introduction message, substantial contributions, and even status updates
- Be exhaustive: every line where the agent name appears or where they send a message
- Name should reflect their role: "ProblemSolving_Expert system prompt" → "Problem Solving Expert"
- Description should summarize their demonstrated capabilities in the conversation

TASK ENTITIES - Extract specific task instructions:
Look for actual task definitions and instructions:
- Task descriptions: Complete task objectives and requirements
- Input parameters: Specific data, queries, or context provided to the task
- Expected outputs: Defined output formats or requirements
- Task constraints: Limitations, rules, or guidelines
- Execution context: Timing, dependencies, or environmental factors

TOOL ENTITIES - CRITICAL: Extract ALL tools, especially function-based tools:

**MANDATORY DETECTION PATTERNS:**
1. Function imports: "from functions import perform_web_search" → Extract "perform_web_search" as Tool
2. Function calls: "perform_web_search(query, count=20)" → Extract "perform_web_search" as Tool
3. Function usage: "results = perform_web_search(...)" → Extract "perform_web_search" as Tool
4. Error mentions: "perform_web_search returned None" → Extract "perform_web_search" as Tool

**EXTRACTION REQUIREMENTS:**
- If you see "perform_web_search" ANYWHERE in the trace, you MUST extract it as a Tool entity
- If you see "from functions import [function_name]", extract [function_name] as Tool
- If you see "[function_name](" pattern, extract [function_name] as Tool
- Count usage frequency across all agents
- Determine importance based on usage frequency and failure impact

**COMPREHENSIVE CONTENT REFERENCE REQUIREMENTS FOR TOOLS:**
- Include EVERY line where the tool name appears (be exhaustive, not selective)
- Include import statements: "from functions import perform_web_search"
- Include function calls: "perform_web_search(query, count=20)"
- Include variable assignments: "results = perform_web_search(...)"
- Include error messages: "perform_web_search returned None"
- Include conditional statements: "if perform_web_search(query) is None"
- Include comments or documentation mentioning the tool
- Include any line containing the exact tool name, regardless of context

**TOOL ENTITY FIELDS:**
- name: The exact function name (e.g., "perform_web_search")
- description: Purpose inferred from usage context and parameters
- importance: HIGH if used by multiple agents or causes failures, MEDIUM if used frequently, LOW if used rarely

**DECORATOR-BASED TOOLS (@tool):**
- Tool signatures: Function names, parameters, return types
- Tool descriptions: Purpose and functionality explanations
- Usage examples: How the tool is called with specific parameters
- Tool configurations: Settings, API keys, endpoints (sanitized)
- Error handling: Retry logic, fallback mechanisms

HUMAN ENTITIES - Extract user interactions and feedback:
Capture complete human interactions:
- Original user queries: Full questions or requests
- Feedback statements: Corrections, approvals, or rejections
- Intervention commands: Direct instructions or overrides
- Context provided: Background information or clarifications
- Interaction timing: When feedback was provided

INPUT/OUTPUT ENTITIES - Extract data specifications:
For data entities, capture:
- Data schemas: Column names, types, constraints
- Query specifications: SQL queries, filters, conditions
- File formats: JSON structures, CSV headers, data types
- Business rules: Logic, calculations, or transformations
- Data sources: Database names, table names, API endpoints

EXTRACTION PATTERNS TO LOOK FOR:
1. Agent Framework Patterns:
- CrewAI: "Agent(role=..., goal=..., backstory=...)"
- LangChain: "SystemMessage(content=...)"
- AutoGen: "ConversableAgent(name=..., system_message=...)"

1b. Conversational Agent Patterns:
- Named message senders: "ProblemSolving_Expert (assistant): [message content]"
- Agent role indicators: "Verification_Expert", "Computer_terminal", "ArithmeticProgressions_Expert"
- Multi-agent conversations: agents addressing each other by name
- Specialized responses: mathematical calculations, code execution, domain expertise
- Agent introductions: "You are given: (1) a task..." or "To solve the task..."

2. Task Patterns:
- "Task(description=..., expected_output=...)"
- "Please [action] with [parameters]"
- "Your task is to [objective]"

3. Tool Patterns:
- "@tool" decorators with function definitions
- "Action: [tool_name]" with "Action Input: [parameters]"
- API calls with endpoints and parameters
- Function imports: "from [module] import [function_name]"
- Function calls: "[function_name]([parameters])" with multiple usage instances
- Module function calls: "[module].[function_name]([parameters])"
- Utility functions used across multiple agents or contexts

4. Human Interaction Patterns:
- Direct user messages or queries
- Feedback like "That's not correct, try again"
- Approvals like "Yes, proceed with this approach"

FORMATTING REQUIREMENTS:
- Preserve original formatting, indentation, and structure when possible
- Use triple quotes for multi-line prompts
- Include parameter names and types for tools
- Maintain JSON/YAML structure for configurations
- Sanitize sensitive information (API keys, passwords) but keep structure

Examples (showing actual runtime extraction):
```
# Agent prompt example (CrewAI)
Agent(
    role='SQL Query Generator',
    goal='Generate accurate Databricks SQL queries based on business requirements',
    backstory='You are an expert SQL developer specializing in Databricks SQL Warehouse. You understand complex business logic and can translate natural language requirements into efficient SQL queries.',
    llm='gpt-5-mini'
)
```

```
# Task prompt example
Task(
    description='Generate a SQL query to compare spend and supplier count for fatty alcohol purchases between 2023 and 2024. Include filters for plant exclusions and intercompany indicators.',
    expected_output='A complete SQL query with proper joins, filters, and aggregations that can be executed in Databricks SQL Warehouse'
)
```

```
# Tool prompt example (@tool decorator)
@tool
def databricks_sql_executor(query: str, warehouse_id: str) -> dict:
    \"\"\"Execute SQL queries in Databricks SQL Warehouse
    Args:
        query: SQL query string to execute
        warehouse_id: Databricks warehouse identifier
    Returns:
        Dictionary with query results and metadata
    \"\"\"
```

```
# COMPREHENSIVE TOOL EXTRACTION EXAMPLE
# ALL these lines should be included in raw_prompt_ref for "perform_web_search":

# Line 45: from functions import perform_web_search
# Line 67: results = perform_web_search(query="machine learning trends", count=20)
# Line 89: search_results = perform_web_search(query="AI applications", count=15)
# Line 102: if perform_web_search(query) is None:
# Line 156: logger.error("perform_web_search returned None")
# Line 203: # Using perform_web_search for data retrieval
# Line 234: except Exception as e: # perform_web_search failed

# RESULT: Extract ALL 7 occurrences as ContentReference objects
{{
"id": "tool_001",
"type": "Tool", 
"name": "perform_web_search",
"raw_prompt_ref": [
    {{"line_start": 45, "line_end": 45}},   # import statement
    {{"line_start": 67, "line_end": 67}},   # first function call
    {{"line_start": 89, "line_end": 89}},   # second function call
    {{"line_start": 102, "line_end": 102}}, # conditional check
    {{"line_start": 156, "line_end": 156}}, # error message
    {{"line_start": 203, "line_end": 203}}, # comment mention
    {{"line_start": 234, "line_end": 234}}  # exception comment
]
}}
```

```
# Human prompt example
Can you compare the spend and SupplierName count on PurchaseCommodityName fatty alcohol for 2023 and 2024 and share insights? I need this for the quarterly business review.
```

IMPORTANCE ASSESSMENT REQUIREMENTS:
For each entity, you MUST assign an importance level based on its role in the system:

HIGH IMPORTANCE:
- Core agents that coordinate or manage other agents
- Critical tasks that are essential for system function or user goals
- Essential tools that multiple agents depend on (e.g., perform_web_search used by multiple agents)
- Function-based tools with frequent usage across the workflow
- Primary inputs that drive the entire workflow
- Final outputs that represent the main system deliverables
- Key human stakeholders who make critical decisions

MEDIUM IMPORTANCE:
- Supporting agents with specialized but non-critical functions
- Standard operational tasks that support the main workflow
- Commonly used tools that enhance functionality (e.g., utility functions used occasionally)
- Function-based tools with moderate usage frequency
- Secondary inputs that provide additional context
- Intermediate outputs that feed into other processes
- Regular human users who provide routine input

LOW IMPORTANCE:
- Auxiliary agents with very specific or rare functions
- Simple tasks with minimal impact on overall system success
- Rarely used tools or utilities (e.g., debugging functions used once)
- Function-based tools with single or infrequent usage
- Optional inputs that provide minor enhancements
- Diagnostic or logging outputs
- Occasional human observers or reviewers

ASSESSMENT GUIDELINES:
- Consider the entity's centrality in the workflow
- Evaluate how many other entities depend on this one
- Assess the impact if this entity failed or was removed
- Look at frequency and criticality of usage patterns
- Consider whether the entity is replaceable or unique
- For function-based tools: Count usage frequency and cross-agent dependencies
        """

    entity_extractor_agent_info = {
        "role": "Entity Extractor",
        "goal": "Extract all entities with proper types, importance levels, and raw prompts from agent trace data",
        "backstory": """You specialize in identifying entities within various data sources. You can recognize agent names,
tools, tasks, and other important elements in logs, documentation, model cards, or natural language descriptions. 

You're particularly skilled at extracting model information, parameters, and performance metrics when available. 
You create concise, informative one-sentence descriptions for every entity you identify, capturing its core purpose
or function in a way that helps others understand its role in the system.

Your expertise helps create comprehensive knowledge graphs by ensuring all relevant entities are properly identified,
categorized, and described. You focus on detail and ensure nothing important is missed, regardless of the format of the input data.""",
        "task_expected_output": "A list of properly structured entities with types, importance levels, and prompts",
    }

    system_prompt = create_system_prompt(
        role=entity_extractor_agent_info["role"],
        backstory=entity_extractor_agent_info["backstory"],
        goal=entity_extractor_agent_info["goal"],
    )
    task_description = create_task_prompt(
        task_description=ENTITY_EXTRACTION_INSTRUCTION_PROMPT,
        task_expected_output=entity_extractor_agent_info["task_expected_output"],
        response_format=EntityExtractionList,
        previous_step_output=None,
    )

    return await run_agent(
        model_name=os.environ["OPENAI_MODEL_NAME"],
        system_prompt=system_prompt,
        task_description=task_description,
        response_format=EntityExtractionList,
    )


async def relationship_analyzer(input_data, previous_step_output, context_documents="") -> RelationshipExtractionList:
    RELATION_EXTRACTION_INSTRUCTION_PROMPT = f"""Map all relationships between system entities (identified in the previous step from this window) using ONLY the predefined relationship types.
Your analysis should focus on interactions described *within this specific window* of a larger chronological trace.

**CONTEXT DOCUMENTS AVAILABLE:**
The following context documents are available to enhance your understanding:
- {context_documents}

**PRIMARY INPUT DATA:**
Here is the main trace you are analyzing:
- {input_data}

MANDATORY: You MUST reference the exact entity list from the previous step with their IDs.
Every entity will have an ID in the format: TYPE_NUMBER (e.g., "agent_001", "human_001", "task_001")
You can ONLY use these exact IDs in your relationship source and target fields.

**CONTEXT-ENHANCED RELATIONSHIP ANALYSIS:**
Use the provided context documents to:
1. Better understand domain-specific workflows and processes
2. Identify standard relationship patterns in the business domain
3. Apply any provided guidelines for relationship categorization
4. Reference examples to understand expected relationship types
5. Recognize technical dependencies and data flows specific to the domain

Identify these 10 relationship types:
1. CONSUMED_BY: Input is processed by Agent
2. PERFORMS: Agent executes Task (focus on actual execution)
3. ASSIGNED_TO: Task delegated to Agent (focus on responsibility)
4. USES: Agent utilizes Tool
5. REQUIRED_BY: Tool is needed by Task
6. SUBTASK_OF: Task is component of parent Task
7. NEXT: Task follows another Task sequentially
8. PRODUCES: Task generates Output
9. DELIVERS_TO: Output is delivered to Human
10. INTERVENES: Agent/Human corrects Task

Critical distinctions:
- CONSUMED_BY: Input→Agent = data processing
- PERFORMS: Agent→Task = actual execution
- ASSIGNED_TO: Task→Agent = responsibility assignment
- DELIVERS_TO: Output→Human = final delivery
- INTERVENES: Agent/Human→Task = active correction/override

RELATIONSHIP EXTRACTION GUIDELINES:
When identifying relationships, be careful to ONLY map connections between actual entities:

1. DO NOT create these relationships:
- Between framework containers (e.g., "Crew", "Pipeline") and other entities
- Using execution IDs or session identifiers as entities
- Between status indicators and actual entities
- Between log formatting elements and actual entities

2. DO create relationships between:
- Actual named agents (e.g., "Organizer", "Thinker") and their tasks
- Agents and the specific tools they use
- Tasks and the tools they require
- Tasks that have sequential or hierarchical dependencies
- Entities and the actual inputs/outputs they consume/produce
- Human participants and the entities they review/modify

3. For agent frameworks:
- The framework container (e.g., "Crew", "Pipeline") is NOT an entity and should NOT have relationships
- Task IDs should be replaced with actual task names/descriptions in relationships
- Focus on the meaningful operational relationships, not the framework structure

EXAMPLE:
In a log entry like:
"🚀 Crew: crew
└── 📋 Task: abc-123 (Generate creative text)
    Status: Executing Task...
    └── 🤖 Agent: Researcher
        Status: In Progress"

CORRECT relationship (if "Generate creative text" is an identified Task entity and "Researcher" an Agent entity):
- "Researcher PERFORMS Generate creative text"

INCORRECT relationships:
- "crew PERFORMS abc-123" (framework container to task ID, unless 'crew' is a defined entity and interacts)
- "Researcher PERFORMS abc-123" (using task ID instead of description from entity list)

For each relationship:
- CRITICAL: Use the exact entity.id field values (NOT entity.name) for source and target fields
- Source field must contain the exact ID of an entity from the extracted entities list
- Target field must contain the exact ID of an entity from the extracted entities list
- Clearly define the relationship type and its directionality (source → relationship → target)
- Populate interaction_prompt according to the prompt-based requirements above
- VALIDATION: Every source and target ID MUST correspond to an existing entity.id in the entities list

INTERACTION-BASED interaction_prompt content requirements:
- For CONSUMED_BY: Extract the ACTUAL DATA CONSUMPTION MESSAGE/LOG showing how the agent processed the input data
- For PERFORMS: Extract the ACTUAL EXECUTION MESSAGE/LOG showing the agent starting or executing the task
- For ASSIGNED_TO: Extract the ACTUAL ASSIGNMENT MESSAGE/LOG showing the task being delegated to the agent
- For USES: Extract the ACTUAL TOOL USAGE MESSAGE/LOG showing the agent calling or using the tool
- For REQUIRED_BY: Extract the ACTUAL REQUIREMENT MESSAGE/LOG showing the task needing or requesting the tool
- For SUBTASK_OF: Extract the ACTUAL HIERARCHICAL MESSAGE/LOG showing the parent-child task relationship
- For NEXT: Extract the ACTUAL SEQUENCE MESSAGE/LOG showing one task following another
- For PRODUCES: Extract the ACTUAL OUTPUT GENERATION MESSAGE/LOG showing the task creating the output
- For DELIVERS_TO: Extract the ACTUAL DELIVERY MESSAGE/LOG showing the output being sent to the human
- For INTERVENES: Extract the ACTUAL INTERVENTION MESSAGE/LOG showing the human/agent correcting the task

**CRITICAL: REFERENCE-ONLY INTERACTION EXTRACTION**  
- You MUST leave the `interaction_prompt` field as an empty string "" for ALL relationships
- You MUST ONLY populate the `interaction_prompt_ref` field with location references to runtime interaction evidence
- DO NOT extract or include the actual interaction content - only identify WHERE it is located
- The actual interaction content will be extracted later by other functions using your references
- When you find interaction evidence you MUST enumerate every **contiguous occurrence** of that interaction text in the numbered trace and include one `ContentReference` object per occurrence in the `interaction_prompt_ref` list
- interaction_prompt_ref points to WHERE in the trace this specific interaction occurred (not static definitions)
- If no explicit interaction evidence exists in the trace, set interaction_prompt="" and interaction_prompt_ref=[]

Example with reference-only interaction:  
```json
{{
"type": "USES",
"source": "agent_001",
"target": "tool_001", 
"interaction_prompt": "",  // ALWAYS EMPTY - DO NOT FILL
"interaction_prompt_ref": [
    {{ "line_start": 120, "line_end": 120 }},
    {{ "line_start": 250, "line_end": 250 }}
]
}}
```

Entity type constraints (STRICT):
- CONSUMED_BY: Input→Agent
- PERFORMS: Agent→Task
- ASSIGNED_TO: Task→Agent
- USES: Agent→Tool
- REQUIRED_BY: Tool→Task
- SUBTASK_OF: Task→Task
- NEXT: Task→Task
- PRODUCES: Task→Output (only Task can produce Output)
- DELIVERS_TO: Output→Human
- INTERVENES: Agent/Human→Task (either Agent or Human can intervene in tasks)

Data flow analysis:
- For CONSUMED_BY: Track explicit and implicit inputs, consumption patterns by agents
- For PRODUCES: Track artifacts, intermediate and final outputs from tasks
- For DELIVERS_TO: Track final delivery of outputs to humans
- Identify data transformations and potential failure points

CRITICAL ID MATCHING REQUIREMENT:
- Use ONLY the exact entity.id values in source and target fields
- DO NOT use entity.name values in source/target fields
- Every relationship source/target must reference an existing entity.id
- Example: If entity has id="agent_001" and name="SQL Query Generator", use "agent_001" in relationships
- VALIDATION: Check that every source and target ID exists in the entities list before creating the relationship

Connection requirements:
Every entity MUST connect to at least one other entity. For disconnected entities:
- Agents: Create PERFORMS, CONSUMED_BY, or logical connection based on role
- Tasks: Must have PERFORMS or ASSIGNED_TO, and typically PRODUCES
- Tools: Must have USES or REQUIRED_BY
- Inputs: Must be connected via CONSUMED_BY to at least one agent
- Outputs: Must be produced by at least one task via PRODUCES, and may be delivered via DELIVERS_TO
- Humans: Connect via DELIVERS_TO or INTERVENES

If no obvious connection exists, create a logical CONSUMED_BY or PRODUCES relationship at minimum.

Interaction Prompt Extraction (Capture actual runtime interaction details):
Extract SPECIFIC interaction details that show HOW entities actually interacted during execution.
Focus on real execution context, timing, parameters, and outcomes.

PERFORMS Relationships (Agent→Task):
Extract the actual execution details:
- Task assignment: "Agent X assigned to execute Task Y at timestamp Z"
- Execution parameters: Specific inputs, configurations, constraints provided
- Execution context: Environmental conditions, dependencies, prerequisites
- Progress indicators: Status updates, intermediate results, completion signals
- Performance metrics: Timing, resource usage, success/failure indicators

USES Relationships (Agent→Tool):
Extract specific tool usage details:
- Tool invocation: Exact tool calls with parameters and context
- Usage purpose: Why the tool was needed at this specific moment
- Input/output: Specific data passed to tool and results received
- Usage patterns: Frequency, timing, conditional usage
- Error handling: Retry attempts, fallback mechanisms, error recovery

ASSIGNED_TO Relationships (Task→Agent):
Extract delegation and assignment details:
- Assignment reason: Why this specific agent was chosen for this task
- Delegation context: Who assigned, when, under what conditions
- Responsibility scope: Specific aspects of the task assigned
- Authority level: Decision-making power, escalation procedures
- Success criteria: How completion/success will be measured

CONSUMED_BY Relationships (Input→Agent):
Extract data consumption details:
- Data source: Specific input location, format, access method
- Consumption pattern: How much, how often, under what conditions
- Processing method: Transformation, validation, filtering applied by agent
- Data dependencies: Required data quality, completeness, timeliness
- Consumption triggers: Events or conditions that initiate consumption

PRODUCES Relationships (Task→Output):
Extract output generation details:
- Output specification: Exact format, structure, content requirements
- Generation process: Steps, transformations, calculations performed
- Quality control: Validation, verification, approval processes
- Delivery method: How output is provided, stored, or transmitted
- Output dependencies: Prerequisites, inputs required for generation

DELIVERS_TO/INTERVENES Relationships (Output→Human, Agent/Human→Task):
Extract human interaction details:
- Delivery method: How output reaches human (email, dashboard, report, etc.)
- Delivery criteria: When and under what conditions output is delivered
- Intervention triggers: Conditions that prompted human/agent involvement
- Feedback specifics: Exact corrections, suggestions, approvals given
- Timing context: When delivery/intervention occurred in the process
- Impact assessment: How the delivery/intervention changed the outcome

EXTRACTION PATTERNS TO LOOK FOR:
1. Execution Logs:
- "Agent X started Task Y with parameters {{...}}"
- "Tool Z called with input {{...}} returned {{...}}"
- "Task completed in X seconds with status Y"

2. Delegation Patterns:
- "Assigning Task X to Agent Y because of expertise in Z"
- "Agent Y selected for Task X due to availability and skills"

3. Data Flow Patterns:
- "Processing input data from source X with filters Y"
- "Generated output file Z with format Y containing X records"

4. Human Interaction Patterns:
- "User provided feedback: 'This needs more detail'"
- "Human approval received for proceeding with approach X"

5. Tool Usage Patterns:
- "Executing SQL query on database X with timeout Y"
- "API call to service X with parameters Y returned status Z"

FORMATTING REQUIREMENTS:
- Include timestamps when available
- Preserve parameter names and values
- Include status codes, error messages, success indicators
- Maintain data format specifications
- Show actual values, not generic placeholders

RELATIONSHIP ID MATCHING EXAMPLES:

Given these entities from the previous step:
- Entity 1: {{id: "input_001", name: "Spend Database Schema", type: "Input"}}
- Entity 2: {{id: "agent_001", name: "SQL Query Generator", type: "Agent"}}
- Entity 3: {{id: "task_001", name: "Generate Spend Analysis", type: "Task"}}
- Entity 4: {{id: "output_001", name: "Analysis Report", type: "Output"}}
- Entity 5: {{id: "human_001", name: "Business Analyst", type: "Human"}}

CORRECT relationships:
```
{{
    source: "input_001",  // Use exact entity.id from entity list
    target: "agent_001",  // Use exact entity.id from entity list
    type: "CONSUMED_BY"
}}
{{
    source: "agent_001",
    target: "task_001", 
    type: "PERFORMS"
}}
{{
    source: "output_001",
    target: "human_001",  // Use "human_001", NOT "skandha.tandra@unilever.com"
    type: "DELIVERS_TO"
}}
```

INCORRECT relationships (will cause graph errors):
```
{{
    source: "Spend Database Schema",  // WRONG: using entity.name
    target: "SQL Query Generator",   // WRONG: using entity.name
    type: "CONSUMED_BY"
}}
{{
    source: "output_001",
    target: "skandha.tandra@unilever.com",  // WRONG: using email/content, not entity.id
    type: "DELIVERS_TO"
}}
```

COMPLETE REFERENCE-ONLY Examples with interaction_prompt_ref:

```json
// CONSUMED_BY example (Reference to Data Consumption Location)
{{  
"type": "CONSUMED_BY",
"source": "input_001",
"target": "agent_001",
"interaction_prompt": "",  // ALWAYS EMPTY - DO NOT FILL
"interaction_prompt_ref": [
    {{"line_start": 45, "line_end": 45}}
]
}}
```

```json
// USES example (Reference to Tool Usage Location)  
{{
"type": "USES",
"source": "agent_001", 
"target": "tool_001",
"interaction_prompt": "",  // ALWAYS EMPTY - DO NOT FILL
"interaction_prompt_ref": [
    {{"line_start": 89, "line_end": 91}}
]
}}
```

```json
// PERFORMS example (Reference to Task Execution Location)
{{
"type": "PERFORMS",
"source": "agent_001",
"target": "task_001", 
"interaction_prompt": "",  // ALWAYS EMPTY - DO NOT FILL
"interaction_prompt_ref": [
    {{"line_start": 67, "line_end": 67}}    
]
}}
```

```json
// DELIVERS_TO example (Reference to Output Delivery Location)
{{
"type": "DELIVERS_TO",
"source": "output_001",
"target": "human_001",
"interaction_prompt": "",  // ALWAYS EMPTY - DO NOT FILL
"interaction_prompt_ref": [
    {{"line_start": 123, "line_end": 123}}
]
}}
```

```json
// INTERVENES example (Reference to Human Intervention Location)
{{
"type": "INTERVENES",
"source": "human_001",
"target": "task_001",
"interaction_prompt": "",  // ALWAYS EMPTY - DO NOT FILL
"interaction_prompt_ref": [
    {{"line_start": 156, "line_end": 156}}
]
}}  
```

- 'PRODUCES' relationships must only originate from 'Task' entities. Do NOT create 'PRODUCES' relationships from 'Agent' or 'Tool' entities. If such a relationship is detected, reassign it to the appropriate Task or remove it.
- 'CONSUMED_BY' relationships must only go from 'Input' to 'Agent'. Do NOT create reverse relationships.
- 'DELIVERS_TO' relationships must only go from 'Output' to 'Human'.

FINAL VALIDATION CHECKLIST:
Before submitting relationships, verify:
1. Every source field contains an exact entity.id from the entities list (format: TYPE_NUMBER)
2. Every target field contains an exact entity.id from the entities list (format: TYPE_NUMBER)
3. No source or target field contains entity names, descriptions, emails, or actual content
4. All relationship types are from the approved list of 10 types
5. Source/target entity types match the constraints for each relationship type
6. SPECIFIC CHECK: No email addresses (like "skandha.tandra@unilever.com") in source/target fields
7. SPECIFIC CHECK: All human references use "human_001", "human_002", etc., not actual names or emails
8. CRITICAL CHECK: For ALL relationships, interaction_prompt MUST be empty string "" - only populate interaction_prompt_ref with location references
9. CRITICAL CHECK: interaction_prompt_ref should point to ACTUAL RUNTIME MESSAGES/LOGS locations, not static prompt definitions or specifications

IMPORTANCE ASSESSMENT REQUIREMENTS:
For each relationship, you MUST assign an importance level based on its role in the system:

HIGH IMPORTANCE:
- Critical data flows that are essential for system operation
- Core agent-task assignments that drive main functionality
- Essential tool usage that multiple workflows depend on
- Primary input consumption that initiates key processes
- Final output delivery to key stakeholders
- Critical intervention relationships that prevent failures

MEDIUM IMPORTANCE:
- Standard operational workflows and data processing
- Common agent-task interactions in normal operation
- Regular tool usage that supports functionality
- Secondary input processing that provides context
- Intermediate output generation for downstream processes
- Routine human interactions and feedback loops

LOW IMPORTANCE:
- Auxiliary connections with minimal system impact
- Optional workflow steps that can be skipped
- Rarely used tool interactions or utilities
- Diagnostic or logging data flows
- Backup or redundant relationships
- Occasional human oversight or monitoring

# ASSESSMENT GUIDELINES:
# - Consider the relationship's criticality to system success
# - Evaluate how often this interaction occurs
# - Assess the impact if this relationship failed
# - Look at whether this connection is replaceable
# - Consider the consequences of removing this relationship
"""

    relationship_analyzer_agent_info = {
        "role": "Relationship Analyzer",
        "goal": "Discover standard relationships between entities using exact entity IDs and predefined relationship types",
        "backstory": """You are an expert in understanding relationships and connections between entities.
You can identify when agents delegate tasks, use tools, ask questions of each other, or work
together on tasks from various data sources including logs, documentation, model cards, or natural language descriptions.

You strictly adhere to using only the ten predefined relationship types (CONSUMED_BY, PERFORMS, ASSIGNED_TO, USES, 
REQUIRED_BY, SUBTASK_OF, NEXT, PRODUCES, DELIVERS_TO, INTERVENES) and never create custom relationship types. You maintain the correct source and target entity types
for each relationship as defined in the system.

CRITICAL SKILL: You are meticulous about using exact entity.id values (not names) in relationship source and target fields.
You understand that using entity names instead of IDs will break the knowledge graph visualization and cause system errors.
You always double-check that every source and target ID corresponds to an actual entity from the extracted entities list.

You clearly distinguish between:
- PERFORMS (Agent→Task): When an agent actually executes/carries out a task
- ASSIGNED_TO (Task→Agent): When a task is delegated/assigned to an agent as a responsibility

For relationships requiring prompts, you extract the appropriate prompt-based content. For relationships not requiring prompts,
you leave the interaction_prompt field empty.

You see patterns in interactions that others might miss, making you
essential for mapping the complex web of relationships in multi-agent systems,
regardless of how the system information is presented.""",
        "task_expected_output": "A list of properly structured relationships with exact entity references",
    }

    system_prompt = create_system_prompt(
        role=relationship_analyzer_agent_info["role"],
        backstory=relationship_analyzer_agent_info["backstory"],
        goal=relationship_analyzer_agent_info["goal"]
    )

    task_description = create_task_prompt(
        task_description=RELATION_EXTRACTION_INSTRUCTION_PROMPT,
        task_expected_output=relationship_analyzer_agent_info["task_expected_output"],
        response_format=RelationshipExtractionList,
        previous_step_output=previous_step_output,
    )

    return await run_agent(
        model_name=os.environ["OPENAI_MODEL_NAME"],
        system_prompt=system_prompt,
        task_description=task_description,
        response_format=RelationshipExtractionList,
    )


async def knowledge_graph_builder(input_data, previous_step_output, context_documents="") -> KnowledgeGraph:
    GRAPH_BUILDER_INSTRUCTION_PROMPT = f"""
**CONTEXT DOCUMENTS AVAILABLE:**
The following context documents are available to enhance your understanding:
- {context_documents}

**PRIMARY INPUT DATA:**
Here is the input window you are analysing (with <L#> line numbers):
- {input_data}
**CONTEXT-ENHANCED KNOWLEDGE GRAPH CONSTRUCTION:**
Use the provided context documents to:
1. Create more accurate system names and summaries based on domain knowledge
2. Apply domain-specific importance assessments
3. Follow any provided guidelines for knowledge graph structure
4. Reference examples for system categorization and analysis
5. Incorporate business domain understanding into failure detection
Construct a unified knowledge graph from analyzed entities, relationships, **and detected failures**.

FAILURE LIST REQUIREMENT (YOU must perform this detection):
- Add a top-level field called `failures` (array) to the final JSON.
- Each item must match the `Failure` schema (id, risk_type, description, raw_text, raw_text_ref, affected_id).
- Use the following predefined risk_type values only: AGENT_ERROR, PLANNING_ERROR, EXECUTION_ERROR, RETRIEVAL_ERROR, HALLUCINATION.
- For every distinct mistake or risk you identify in this window, create exactly one Failure object with **all** occurrences referenced via `raw_text_ref`.
- Leave `raw_text` empty "" and rely on `raw_text_ref` for extraction (same convention as prompts).
- `affected_id` should point to the entity or relation most responsible, if applicable; otherwise leave null.
**MANDATORY**: If this window shows *any* error, bug, or incorrect behaviour you **MUST** add at least one Failure object. Unit-tests will fail if the `failures` array is missing or empty.
IF ANY SUCH KEYWORD APPEARS AND THERE IS NO FAILURE OBJECT, THE OUTPUT WILL BE REJECTED.
QUICK CHECKLIST BEFORE YOU SUBMIT:
1. `failures` array exists in top-level JSON.
2. Each Failure has at least one `raw_text_ref` entry.
3. Failure IDs follow sequential `failure_001`, `failure_002`, … order.
4. The first entry in `raw_text_ref` (index 0) must occur **on or before** the dataset's `mistake_step` line.
- The **primary evidence** for a Failure must be the **exact agent message** at the first mistake step—the line where the incorrect answer or erroneous action first appears. Do NOT rely solely on later diagnostic logs.
- Typical evidence keywords include: "ERROR", "Incorrect answer", "Traceback", "I cannot", "Failed to". Capture that specific message line via `raw_text_ref`.
CRITICAL FIRST-SYMPTOM LINE RULE
• The *very first* line that shows the mistake MUST be captured via `raw_text_ref`.
• "First line" means the earliest agent or tool message whose content already demonstrates the error.
• Typical trigger words to scan for: "error", "incorrect", "failed", "traceback", "cannot", "exception", "invalid".
• Mini-example (multi-line traceback):
    assistant: Traceback (most recent call last)
    assistant:   File "...", line 12, in <module>
    assistant:   ValueError: division by zero   ← only this FIRST offending line is referenced
  Correct `raw_text_ref` → `[{{"line_start": 2, "line_end": 2}}]`
Example Failure object:
```json
{{
  "id": "failure_001",
  "risk_type": "AGENT_ERROR",
  "description": "Agent provided incorrect SQL syntax causing downstream failure",
  "raw_text": "",
  "raw_text_ref": [{{"line_start": 42, "line_end": 43}}],
  "affected_id": "agent_001"
}}
```
Core requirements:
1. Integrate entities and relationships into a coherent structure
2. Maintain consistent entity references
3. Use ONLY the ten predefined relation types
4. Preserve all prompt content and importance assessments
5. Include metadata with timestamp and statistics
6. Create a descriptive system name (3-7 words)
7. Write a concise 2-3 sentence system summary
8. Include comprehensive system assessment

System naming guidelines:
- Reflect primary purpose and function
- Include key agent roles
- Mention domain/industry if applicable
- Highlight distinctive capabilities

Example names: "Financial Research Collaboration Network", "Customer Support Ticket Triage System"

System summary must explain:
- What the system does (purpose/function)
- How it works (agent coordination pattern)
- Value provided (problem solved)

Example summary: "This system analyzes customer support tickets using a classifier agent and specialist agents to route issues to appropriate departments. It manages workflow and handoffs between specialists. The system reduces response time by matching issues with qualified representatives."

Validation requirements:
1. Include ONLY these relationship types:
   - CONSUMED_BY: Input→Agent
   - PERFORMS: Agent→Task
   - ASSIGNED_TO: Task→Agent
   - USES: Agent→Tool
   - REQUIRED_BY: Tool→Task
   - SUBTASK_OF: Task→Task
   - NEXT: Task→Task (sequence)
   - PRODUCES: Task→Output
   - DELIVERS_TO: Output→Human
   - INTERVENES: Agent/Human→Task
   
2. Confirm task relationships accurately show:
   - Sequential dependencies (NEXT)
   - Hierarchical structure (SUBTASK_OF)
   
3. Verify entity IDs (not names) in all relationships

Connectivity validation:
- All entities must connect to at least one other entity
- All inputs must be consumed by agents via CONSUMED_BY
- All outputs must be produced by tasks via PRODUCES
- All outputs should be delivered to humans via DELIVERS_TO when applicable
- All components must be reachable (no isolated subgraphs)
- Clear paths must exist from inputs to outputs through agents and tasks
- All agents must have defined roles
- Document any added connections in metadata.connectivity_fixups

System Integration:
- Focus on comprehensive system analysis and assessment
- Include detailed metadata about system components and interactions
- Document system architecture patterns and design decisions

System assessment:
1. Evaluate overall system importance (HIGH/MEDIUM/LOW) based on:
   - Component count and centrality
   - Workflow centrality
   - Uniqueness/replaceability
   - Failure impact
   - Single points of failure
   - Usage frequency
   
2. Provide 3-5 sentence justification covering:
   - Importance level rationale
   - Key assessment factors
   - Architecture strengths/vulnerabilities
   - Risk mitigations
   - Comparison to similar systems

Data flow analysis:
- Map input consumption paths
- Track output production and utilization
- Identify transformation points
- Document critical data paths
- Highlight bottlenecks and redundancies

Output a complete KnowledgeGraph object with entities, relations, metadata, system_name, and system_summary.
"""

    knowledge_graph_builder_agent_info = {
        "role": "Knowledge Graph Builder",
        "goal": "Build a complete, consistent knowledge graph using extracted entities and relationships with proper validation",
        "backstory": """You are skilled at organizing information into structured knowledge graphs.
You understand how to represent entities and relationships in a way that captures the essence
of a system. Your knowledge graphs are well-structured, consistent, and follow best practices
for knowledge representation. 

You excel at analyzing complex systems holistically to provide overall risk assessments.
You can evaluate the criticality of entire systems based on their components, dependencies,
and role in broader workflows. Your system-level risk analyses help stakeholders understand
key vulnerabilities and critical components that warrant special attention.

You ensure the final output is in a format that can be easily used for further analysis or visualization.""",
        "task_expected_output": "A complete knowledge graph with entities, relationships, failures, and metadata",
    }

    system_prompt = create_system_prompt(
        role=knowledge_graph_builder_agent_info["role"],
        backstory=knowledge_graph_builder_agent_info["backstory"],
        goal=knowledge_graph_builder_agent_info["goal"],
    )
    full_previous_step_output = {
        **previous_step_output,
        "incorrect_results": [],
    }
    kg = None
    for i in range(3):
        if kg is not None:
            full_previous_step_output['incorrect_results'].append(kg.model_dump())

        task_description = create_task_prompt(
            task_description=GRAPH_BUILDER_INSTRUCTION_PROMPT,
            task_expected_output=knowledge_graph_builder_agent_info["task_expected_output"],
            response_format=KnowledgeGraph,
            previous_step_output=json.dumps(full_previous_step_output, indent=2),
        )

        kg = await run_agent(
            model_name=os.environ["OPENAI_MODEL_NAME"],
            system_prompt=system_prompt,
            task_description=task_description,
            response_format=KnowledgeGraph,
        )
        valid_result = True
        entities_ids = [e.id for e in kg.entities]
        for rel in kg.relations:
            if rel.source not in entities_ids or rel.target not in entities_ids:
                valid_result = False
                break

        if valid_result:
            break

    if not valid_result:
        valid_relations = []
        entities_ids = [e.id for e in kg.entities]
        for rel in kg.relations:
            if rel.source not in entities_ids or rel.target not in entities_ids:
                continue
            else:
                valid_relations.append(rel)
        kg = kg.model_copy(update={"relations": valid_relations})

    return kg

async def run(input_data, context: str = None) -> KnowledgeGraph:
    entity_extractor_result = await entity_extractor(input_data, context)

    previous_step_output = json.dumps(
        {
            "entities": entity_extractor_result.model_dump(),
        },
        indent=2,
    )

    relationship_analyzer_result = await relationship_analyzer(
        input_data, previous_step_output, context
    )

    previous_step_output = {
            "entities": entity_extractor_result.model_dump(),
            "relations": relationship_analyzer_result.model_dump(),
    }

    knowledge_graph_result = await knowledge_graph_builder(
        input_data, previous_step_output, context
    )

    return knowledge_graph_result