Spaces:
Running
Running
Commit
·
ea56a51
1
Parent(s):
232e016
add
Browse files
agentgraph/methods/production/openai_structured_extractor.py
CHANGED
|
@@ -54,12 +54,17 @@ class OpenAIStructuredExtractor:
|
|
| 54 |
"""
|
| 55 |
logger.info(f"Starting knowledge graph extraction for {len(input_data)} characters of input")
|
| 56 |
|
| 57 |
-
# System prompt
|
| 58 |
-
system_prompt = """You are an expert
|
| 59 |
|
| 60 |
-
|
| 61 |
|
| 62 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
- Agent: AI agents with specific roles
|
| 64 |
- Task: Specific tasks or objectives
|
| 65 |
- Tool: Tools or functions used
|
|
@@ -67,7 +72,7 @@ Extract a knowledge graph with these entity types:
|
|
| 67 |
- Output: Data outputs from the system
|
| 68 |
- Human: Human users or stakeholders
|
| 69 |
|
| 70 |
-
|
| 71 |
- CONSUMED_BY: Input→Agent
|
| 72 |
- PERFORMS: Agent→Task
|
| 73 |
- ASSIGNED_TO: Task→Agent
|
|
@@ -79,28 +84,27 @@ Use these relationship types:
|
|
| 79 |
- DELIVERS_TO: Output→Human
|
| 80 |
- INTERVENES: Agent/Human→Task
|
| 81 |
|
|
|
|
| 82 |
For each entity provide:
|
| 83 |
-
- id: unique identifier
|
| 84 |
- type: one of the types above
|
| 85 |
- name: descriptive name
|
| 86 |
- importance: HIGH, MEDIUM, or LOW
|
| 87 |
- raw_prompt: actual prompt/specification content that defines this entity
|
| 88 |
-
- raw_prompt_ref: list of content references with line_start and line_end (
|
| 89 |
|
| 90 |
For each relation provide:
|
| 91 |
- id: unique identifier
|
| 92 |
-
- source:
|
| 93 |
-
- target: target entity id
|
| 94 |
- type: one of the types above
|
| 95 |
- importance: HIGH, MEDIUM, or LOW
|
| 96 |
- interaction_prompt: runtime evidence showing this relationship occurred
|
| 97 |
-
- interaction_prompt_ref: list of content references (
|
| 98 |
-
|
| 99 |
-
Provide system_name and system_summary for the overall system.
|
| 100 |
|
| 101 |
-
|
| 102 |
|
| 103 |
-
|
|
|
|
| 104 |
|
| 105 |
try:
|
| 106 |
response = self.client.responses.parse(
|
|
|
|
| 54 |
"""
|
| 55 |
logger.info(f"Starting knowledge graph extraction for {len(input_data)} characters of input")
|
| 56 |
|
| 57 |
+
# System prompt - focus on your role and methodology
|
| 58 |
+
system_prompt = """You are an expert knowledge graph analyst specializing in agent system traces.
|
| 59 |
|
| 60 |
+
Your task is to extract structured knowledge graphs from agent execution traces. You identify entities (Agents, Tasks, Tools, Inputs, Outputs, Humans) and their relationships, providing precise content references when line markers are available.
|
| 61 |
|
| 62 |
+
You always return a complete knowledge graph with meaningful entities, logical relationships, and accurate metadata."""
|
| 63 |
+
|
| 64 |
+
# User prompt - specific instructions and data
|
| 65 |
+
user_prompt = f"""Analyze this agent system trace and extract a knowledge graph with the following specifications:
|
| 66 |
+
|
| 67 |
+
ENTITY TYPES:
|
| 68 |
- Agent: AI agents with specific roles
|
| 69 |
- Task: Specific tasks or objectives
|
| 70 |
- Tool: Tools or functions used
|
|
|
|
| 72 |
- Output: Data outputs from the system
|
| 73 |
- Human: Human users or stakeholders
|
| 74 |
|
| 75 |
+
RELATIONSHIP TYPES:
|
| 76 |
- CONSUMED_BY: Input→Agent
|
| 77 |
- PERFORMS: Agent→Task
|
| 78 |
- ASSIGNED_TO: Task→Agent
|
|
|
|
| 84 |
- DELIVERS_TO: Output→Human
|
| 85 |
- INTERVENES: Agent/Human→Task
|
| 86 |
|
| 87 |
+
REQUIREMENTS:
|
| 88 |
For each entity provide:
|
| 89 |
+
- id: unique identifier
|
| 90 |
- type: one of the types above
|
| 91 |
- name: descriptive name
|
| 92 |
- importance: HIGH, MEDIUM, or LOW
|
| 93 |
- raw_prompt: actual prompt/specification content that defines this entity
|
| 94 |
+
- raw_prompt_ref: list of content references with line_start and line_end (use <L#> markers if available)
|
| 95 |
|
| 96 |
For each relation provide:
|
| 97 |
- id: unique identifier
|
| 98 |
+
- source/target: entity IDs
|
|
|
|
| 99 |
- type: one of the types above
|
| 100 |
- importance: HIGH, MEDIUM, or LOW
|
| 101 |
- interaction_prompt: runtime evidence showing this relationship occurred
|
| 102 |
+
- interaction_prompt_ref: list of content references (use <L#> markers if available)
|
|
|
|
|
|
|
| 103 |
|
| 104 |
+
Also provide system_name and system_summary for the overall system.
|
| 105 |
|
| 106 |
+
TRACE DATA:
|
| 107 |
+
{input_data}"""
|
| 108 |
|
| 109 |
try:
|
| 110 |
response = self.client.responses.parse(
|
agentgraph/shared/models/reference_based/content_reference.py
CHANGED
|
@@ -5,42 +5,12 @@ from typing import Optional
|
|
| 5 |
class ContentReference(BaseModel):
|
| 6 |
"""
|
| 7 |
Reference to content location in the original trace using line numbers and character positions.
|
| 8 |
-
This allows AI agents to provide position metadata instead of full content, enabling
|
| 9 |
-
efficient mapping back to the original trace while reducing hallucination risks.
|
| 10 |
-
|
| 11 |
-
CRITICAL FOR LLMs: Line counting accuracy is essential for proper content resolution.
|
| 12 |
-
Use systematic counting methods and verify your line numbers before submission.
|
| 13 |
"""
|
| 14 |
line_start: Optional[int] = Field(None,
|
| 15 |
-
description="""Starting line number where the content begins (1-based indexing from <L1>, <L2>... markers).
|
| 16 |
-
|
| 17 |
-
ACCURACY REQUIREMENTS FOR LLMs:
|
| 18 |
-
- Count <L#> markers systematically from the beginning of the input
|
| 19 |
-
- Use anchor points: find distinctive text first, then count nearby lines
|
| 20 |
-
- Double-check by counting backwards from a known reference point
|
| 21 |
-
- For multi-line content, this should be the FIRST line containing the content
|
| 22 |
-
- In key-value pairs (e.g. "content": "..."), reference the line where the VALUE starts, not the key
|
| 23 |
-
|
| 24 |
-
COMMON ERRORS TO AVOID:
|
| 25 |
-
- Miscounting due to skipping indented continuation lines
|
| 26 |
-
- Confusing line numbers when content spans multiple <L#> markers
|
| 27 |
-
- Using approximate counting instead of precise marker identification
|
| 28 |
-
|
| 29 |
-
VERIFICATION: Before submitting, locate your chosen line number and confirm it contains the expected content start."""
|
| 30 |
)
|
| 31 |
line_end: Optional[int] = Field(None,
|
| 32 |
-
description="""Ending line number where content ends (1-based indexing from <L1>, <L2>... markers).
|
| 33 |
-
|
| 34 |
-
ACCURACY REQUIREMENTS FOR LLMs:
|
| 35 |
-
- Must be >= line_start (validation will fail otherwise)
|
| 36 |
-
- For single-line content, line_end should equal line_start
|
| 37 |
-
- For multi-line content, find the LAST line containing the content
|
| 38 |
-
- Include indented continuation lines that are part of the same logical content block
|
| 39 |
-
|
| 40 |
-
VERIFICATION STRATEGY:
|
| 41 |
-
- Count from line_start to ensure proper range
|
| 42 |
-
- Confirm the line_end marker contains the actual end of the content
|
| 43 |
-
- Check that no content continues beyond your specified line_end"""
|
| 44 |
)
|
| 45 |
|
| 46 |
def validate_line_range(self) -> bool:
|
|
|
|
| 5 |
class ContentReference(BaseModel):
|
| 6 |
"""
|
| 7 |
Reference to content location in the original trace using line numbers and character positions.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
"""
|
| 9 |
line_start: Optional[int] = Field(None,
|
| 10 |
+
description="""Starting line number where the content begins (1-based indexing from <L1>, <L2>... markers)."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
)
|
| 12 |
line_end: Optional[int] = Field(None,
|
| 13 |
+
description="""Ending line number where content ends (1-based indexing from <L1>, <L2>... markers)."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
)
|
| 15 |
|
| 16 |
def validate_line_range(self) -> bool:
|