Spaces:

holistic-ai
/

AgentGraph

Running

App Files Files Community

wu981526092 commited on Sep 5, 2025

Commit

ea56a51

1 Parent(s): 232e016

add

Browse files

Files changed (2) hide show

agentgraph/methods/production/openai_structured_extractor.py +18 -14
agentgraph/shared/models/reference_based/content_reference.py +2 -32

agentgraph/methods/production/openai_structured_extractor.py CHANGED Viewed

@@ -54,12 +54,17 @@ class OpenAIStructuredExtractor:
         """
         logger.info(f"Starting knowledge graph extraction for {len(input_data)} characters of input")
-        # System prompt for direct KnowledgeGraph extraction with content references
-        system_prompt = """You are an expert at analyzing agent system traces and extracting knowledge graphs with precise content references.
-The input may contain line markers like <L1>, <L2>, etc. Use these to create accurate content references when available.
-Extract a knowledge graph with these entity types:
 - Agent: AI agents with specific roles
 - Task: Specific tasks or objectives
 - Tool: Tools or functions used
@@ -67,7 +72,7 @@ Extract a knowledge graph with these entity types:
 - Output: Data outputs from the system
 - Human: Human users or stakeholders
-Use these relationship types:
 - CONSUMED_BY: Input→Agent
 - PERFORMS: Agent→Task
 - ASSIGNED_TO: Task→Agent
@@ -79,28 +84,27 @@ Use these relationship types:
 - DELIVERS_TO: Output→Human
 - INTERVENES: Agent/Human→Task
 For each entity provide:
-- id: unique identifier (generate if needed)
 - type: one of the types above
 - name: descriptive name
 - importance: HIGH, MEDIUM, or LOW
 - raw_prompt: actual prompt/specification content that defines this entity
-- raw_prompt_ref: list of content references with line_start and line_end (if line markers available)
 For each relation provide:
 - id: unique identifier
-- source: source entity id
-- target: target entity id
 - type: one of the types above
 - importance: HIGH, MEDIUM, or LOW
 - interaction_prompt: runtime evidence showing this relationship occurred
-- interaction_prompt_ref: list of content references (if line markers available)
-Provide system_name and system_summary for the overall system.
-Focus on extracting the actual workflow with meaningful entities and relationships."""
-        user_prompt = f"Analyze this agent system trace and extract a knowledge graph:\n\n{input_data}"
         try:
             response = self.client.responses.parse(

         """
         logger.info(f"Starting knowledge graph extraction for {len(input_data)} characters of input")
+        # System prompt - focus on your role and methodology
+        system_prompt = """You are an expert knowledge graph analyst specializing in agent system traces.
+Your task is to extract structured knowledge graphs from agent execution traces. You identify entities (Agents, Tasks, Tools, Inputs, Outputs, Humans) and their relationships, providing precise content references when line markers are available.
+You always return a complete knowledge graph with meaningful entities, logical relationships, and accurate metadata."""
+        # User prompt - specific instructions and data
+        user_prompt = f"""Analyze this agent system trace and extract a knowledge graph with the following specifications:
+ENTITY TYPES:
 - Agent: AI agents with specific roles
 - Task: Specific tasks or objectives
 - Tool: Tools or functions used
 - Output: Data outputs from the system
 - Human: Human users or stakeholders
+RELATIONSHIP TYPES:
 - CONSUMED_BY: Input→Agent
 - PERFORMS: Agent→Task
 - ASSIGNED_TO: Task→Agent
 - DELIVERS_TO: Output→Human
 - INTERVENES: Agent/Human→Task
+REQUIREMENTS:
 For each entity provide:
+- id: unique identifier
 - type: one of the types above
 - name: descriptive name
 - importance: HIGH, MEDIUM, or LOW
 - raw_prompt: actual prompt/specification content that defines this entity
+- raw_prompt_ref: list of content references with line_start and line_end (use <L#> markers if available)
 For each relation provide:
 - id: unique identifier
+- source/target: entity IDs
 - type: one of the types above
 - importance: HIGH, MEDIUM, or LOW
 - interaction_prompt: runtime evidence showing this relationship occurred
+- interaction_prompt_ref: list of content references (use <L#> markers if available)
+Also provide system_name and system_summary for the overall system.
+TRACE DATA:
+{input_data}"""
         try:
             response = self.client.responses.parse(

agentgraph/shared/models/reference_based/content_reference.py CHANGED Viewed

@@ -5,42 +5,12 @@ from typing import Optional
 class ContentReference(BaseModel):
     """
     Reference to content location in the original trace using line numbers and character positions.
-    This allows AI agents to provide position metadata instead of full content, enabling
-    efficient mapping back to the original trace while reducing hallucination risks.
-    CRITICAL FOR LLMs: Line counting accuracy is essential for proper content resolution.
-    Use systematic counting methods and verify your line numbers before submission.
     """
     line_start: Optional[int] = Field(None,
-        description="""Starting line number where the content begins (1-based indexing from <L1>, <L2>... markers).
-        ACCURACY REQUIREMENTS FOR LLMs:
-        - Count <L#> markers systematically from the beginning of the input
-        - Use anchor points: find distinctive text first, then count nearby lines
-        - Double-check by counting backwards from a known reference point
-        - For multi-line content, this should be the FIRST line containing the content
-        - In key-value pairs (e.g. "content": "..."), reference the line where the VALUE starts, not the key
-        COMMON ERRORS TO AVOID:
-        - Miscounting due to skipping indented continuation lines
-        - Confusing line numbers when content spans multiple <L#> markers
-        - Using approximate counting instead of precise marker identification
-        VERIFICATION: Before submitting, locate your chosen line number and confirm it contains the expected content start."""
     )
     line_end: Optional[int] = Field(None,
-        description="""Ending line number where content ends (1-based indexing from <L1>, <L2>... markers).
-        ACCURACY REQUIREMENTS FOR LLMs:
-        - Must be >= line_start (validation will fail otherwise)
-        - For single-line content, line_end should equal line_start
-        - For multi-line content, find the LAST line containing the content
-        - Include indented continuation lines that are part of the same logical content block
-        VERIFICATION STRATEGY:
-        - Count from line_start to ensure proper range
-        - Confirm the line_end marker contains the actual end of the content
-        - Check that no content continues beyond your specified line_end"""
     )
     def validate_line_range(self) -> bool:

 class ContentReference(BaseModel):
     """
     Reference to content location in the original trace using line numbers and character positions.
     """
     line_start: Optional[int] = Field(None,
+        description="""Starting line number where the content begins (1-based indexing from <L1>, <L2>... markers)."""
     )
     line_end: Optional[int] = Field(None,
+        description="""Ending line number where content ends (1-based indexing from <L1>, <L2>... markers)."""
     )
     def validate_line_range(self) -> bool: