wu981526092 commited on
Commit
ea56a51
·
1 Parent(s): 232e016
agentgraph/methods/production/openai_structured_extractor.py CHANGED
@@ -54,12 +54,17 @@ class OpenAIStructuredExtractor:
54
  """
55
  logger.info(f"Starting knowledge graph extraction for {len(input_data)} characters of input")
56
 
57
- # System prompt for direct KnowledgeGraph extraction with content references
58
- system_prompt = """You are an expert at analyzing agent system traces and extracting knowledge graphs with precise content references.
59
 
60
- The input may contain line markers like <L1>, <L2>, etc. Use these to create accurate content references when available.
61
 
62
- Extract a knowledge graph with these entity types:
 
 
 
 
 
63
  - Agent: AI agents with specific roles
64
  - Task: Specific tasks or objectives
65
  - Tool: Tools or functions used
@@ -67,7 +72,7 @@ Extract a knowledge graph with these entity types:
67
  - Output: Data outputs from the system
68
  - Human: Human users or stakeholders
69
 
70
- Use these relationship types:
71
  - CONSUMED_BY: Input→Agent
72
  - PERFORMS: Agent→Task
73
  - ASSIGNED_TO: Task→Agent
@@ -79,28 +84,27 @@ Use these relationship types:
79
  - DELIVERS_TO: Output→Human
80
  - INTERVENES: Agent/Human→Task
81
 
 
82
  For each entity provide:
83
- - id: unique identifier (generate if needed)
84
  - type: one of the types above
85
  - name: descriptive name
86
  - importance: HIGH, MEDIUM, or LOW
87
  - raw_prompt: actual prompt/specification content that defines this entity
88
- - raw_prompt_ref: list of content references with line_start and line_end (if line markers available)
89
 
90
  For each relation provide:
91
  - id: unique identifier
92
- - source: source entity id
93
- - target: target entity id
94
  - type: one of the types above
95
  - importance: HIGH, MEDIUM, or LOW
96
  - interaction_prompt: runtime evidence showing this relationship occurred
97
- - interaction_prompt_ref: list of content references (if line markers available)
98
-
99
- Provide system_name and system_summary for the overall system.
100
 
101
- Focus on extracting the actual workflow with meaningful entities and relationships."""
102
 
103
- user_prompt = f"Analyze this agent system trace and extract a knowledge graph:\n\n{input_data}"
 
104
 
105
  try:
106
  response = self.client.responses.parse(
 
54
  """
55
  logger.info(f"Starting knowledge graph extraction for {len(input_data)} characters of input")
56
 
57
+ # System prompt - focus on your role and methodology
58
+ system_prompt = """You are an expert knowledge graph analyst specializing in agent system traces.
59
 
60
+ Your task is to extract structured knowledge graphs from agent execution traces. You identify entities (Agents, Tasks, Tools, Inputs, Outputs, Humans) and their relationships, providing precise content references when line markers are available.
61
 
62
+ You always return a complete knowledge graph with meaningful entities, logical relationships, and accurate metadata."""
63
+
64
+ # User prompt - specific instructions and data
65
+ user_prompt = f"""Analyze this agent system trace and extract a knowledge graph with the following specifications:
66
+
67
+ ENTITY TYPES:
68
  - Agent: AI agents with specific roles
69
  - Task: Specific tasks or objectives
70
  - Tool: Tools or functions used
 
72
  - Output: Data outputs from the system
73
  - Human: Human users or stakeholders
74
 
75
+ RELATIONSHIP TYPES:
76
  - CONSUMED_BY: Input→Agent
77
  - PERFORMS: Agent→Task
78
  - ASSIGNED_TO: Task→Agent
 
84
  - DELIVERS_TO: Output→Human
85
  - INTERVENES: Agent/Human→Task
86
 
87
+ REQUIREMENTS:
88
  For each entity provide:
89
+ - id: unique identifier
90
  - type: one of the types above
91
  - name: descriptive name
92
  - importance: HIGH, MEDIUM, or LOW
93
  - raw_prompt: actual prompt/specification content that defines this entity
94
+ - raw_prompt_ref: list of content references with line_start and line_end (use <L#> markers if available)
95
 
96
  For each relation provide:
97
  - id: unique identifier
98
+ - source/target: entity IDs
 
99
  - type: one of the types above
100
  - importance: HIGH, MEDIUM, or LOW
101
  - interaction_prompt: runtime evidence showing this relationship occurred
102
+ - interaction_prompt_ref: list of content references (use <L#> markers if available)
 
 
103
 
104
+ Also provide system_name and system_summary for the overall system.
105
 
106
+ TRACE DATA:
107
+ {input_data}"""
108
 
109
  try:
110
  response = self.client.responses.parse(
agentgraph/shared/models/reference_based/content_reference.py CHANGED
@@ -5,42 +5,12 @@ from typing import Optional
5
  class ContentReference(BaseModel):
6
  """
7
  Reference to content location in the original trace using line numbers and character positions.
8
- This allows AI agents to provide position metadata instead of full content, enabling
9
- efficient mapping back to the original trace while reducing hallucination risks.
10
-
11
- CRITICAL FOR LLMs: Line counting accuracy is essential for proper content resolution.
12
- Use systematic counting methods and verify your line numbers before submission.
13
  """
14
  line_start: Optional[int] = Field(None,
15
- description="""Starting line number where the content begins (1-based indexing from <L1>, <L2>... markers).
16
-
17
- ACCURACY REQUIREMENTS FOR LLMs:
18
- - Count <L#> markers systematically from the beginning of the input
19
- - Use anchor points: find distinctive text first, then count nearby lines
20
- - Double-check by counting backwards from a known reference point
21
- - For multi-line content, this should be the FIRST line containing the content
22
- - In key-value pairs (e.g. "content": "..."), reference the line where the VALUE starts, not the key
23
-
24
- COMMON ERRORS TO AVOID:
25
- - Miscounting due to skipping indented continuation lines
26
- - Confusing line numbers when content spans multiple <L#> markers
27
- - Using approximate counting instead of precise marker identification
28
-
29
- VERIFICATION: Before submitting, locate your chosen line number and confirm it contains the expected content start."""
30
  )
31
  line_end: Optional[int] = Field(None,
32
- description="""Ending line number where content ends (1-based indexing from <L1>, <L2>... markers).
33
-
34
- ACCURACY REQUIREMENTS FOR LLMs:
35
- - Must be >= line_start (validation will fail otherwise)
36
- - For single-line content, line_end should equal line_start
37
- - For multi-line content, find the LAST line containing the content
38
- - Include indented continuation lines that are part of the same logical content block
39
-
40
- VERIFICATION STRATEGY:
41
- - Count from line_start to ensure proper range
42
- - Confirm the line_end marker contains the actual end of the content
43
- - Check that no content continues beyond your specified line_end"""
44
  )
45
 
46
  def validate_line_range(self) -> bool:
 
5
  class ContentReference(BaseModel):
6
  """
7
  Reference to content location in the original trace using line numbers and character positions.
 
 
 
 
 
8
  """
9
  line_start: Optional[int] = Field(None,
10
+ description="""Starting line number where the content begins (1-based indexing from <L1>, <L2>... markers)."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  )
12
  line_end: Optional[int] = Field(None,
13
+ description="""Ending line number where content ends (1-based indexing from <L1>, <L2>... markers)."""
 
 
 
 
 
 
 
 
 
 
 
14
  )
15
 
16
  def validate_line_range(self) -> bool: