wu981526092 commited on
Commit
7e807a3
·
1 Parent(s): 7bca5b5

Add OpenAI Structured Outputs extraction method

Browse files

- Implement simple OpenAI structured outputs extractor using Pydantic models
- Register as new production method 'openai_structured' in method registry
- Support direct extraction without complex multi-agent workflow
- Generate more complex knowledge graphs with better NEXT relationships
- Include factory integration for seamless system integration
- Build frontend with updated method selection capability

agentgraph/methods/production/__init__.py CHANGED
@@ -7,8 +7,10 @@ These methods use content references and line numbers for precise content locati
7
 
8
  from . import multi_agent_knowledge_extractor
9
  from . import pydantic_multi_agent_knowledge_extractor
 
10
 
11
  __all__ = [
12
  "multi_agent_knowledge_extractor",
13
- "pydantic_multi_agent_knowledge_extractor",
 
14
  ]
 
7
 
8
  from . import multi_agent_knowledge_extractor
9
  from . import pydantic_multi_agent_knowledge_extractor
10
+ from . import openai_structured_extractor
11
 
12
  __all__ = [
13
  "multi_agent_knowledge_extractor",
14
+ "pydantic_multi_agent_knowledge_extractor",
15
+ "openai_structured_extractor",
16
  ]
agentgraph/methods/production/openai_structured_extractor.py ADDED
@@ -0,0 +1,277 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ OpenAI Structured Outputs Knowledge Extractor
4
+
5
+ A simple, direct approach using OpenAI's structured outputs API to extract
6
+ knowledge graphs in one step using Pydantic models.
7
+ """
8
+
9
+ import os
10
+ import logging
11
+ from typing import Optional, List, Dict, Any
12
+ import uuid
13
+ from datetime import datetime
14
+ from dotenv import load_dotenv
15
+ from openai import OpenAI
16
+ from pydantic import BaseModel
17
+
18
+ # Import Pydantic models
19
+ from agentgraph.shared.models.reference_based import KnowledgeGraph, Entity, Relation
20
+
21
+ # Load environment variables from root directory
22
+ load_dotenv('/Users/zekunwu/Desktop/agent_monitoring/.env')
23
+
24
+ # Configure logging
25
+ logger = logging.getLogger(__name__)
26
+
27
+ # Simplified models for OpenAI structured outputs
28
+ class SimpleEntity(BaseModel):
29
+ id: str
30
+ type: str # Agent, Task, Tool, Input, Output, Human
31
+ name: str
32
+ importance: str # HIGH, MEDIUM, LOW
33
+
34
+ class SimpleRelation(BaseModel):
35
+ id: str
36
+ source: str
37
+ target: str
38
+ type: str # PERFORMS, USES, etc.
39
+ importance: str
40
+
41
+ class SimpleKnowledgeGraph(BaseModel):
42
+ system_name: str
43
+ system_summary: str
44
+ entities: List[SimpleEntity]
45
+ relations: List[SimpleRelation]
46
+
47
+ def normalize_importance(importance: str) -> str:
48
+ """Normalize importance values to HIGH/MEDIUM/LOW."""
49
+ importance_upper = importance.upper()
50
+ # Map common variations to standard values
51
+ mapping = {
52
+ "CRITICAL": "HIGH",
53
+ "VERY HIGH": "HIGH",
54
+ "VERY LOW": "LOW",
55
+ "NORMAL": "MEDIUM",
56
+ "STANDARD": "MEDIUM"
57
+ }
58
+ return mapping.get(importance_upper, importance_upper)
59
+
60
+ def convert_simple_to_full_kg(simple_kg: SimpleKnowledgeGraph) -> KnowledgeGraph:
61
+ """Convert simplified KG to full KnowledgeGraph model."""
62
+
63
+ # Convert entities
64
+ entities = []
65
+ for se in simple_kg.entities:
66
+ entity = Entity(
67
+ id=se.id,
68
+ type=se.type,
69
+ name=se.name,
70
+ importance=normalize_importance(se.importance), # Normalize importance
71
+ raw_prompt="", # Empty as per requirements
72
+ raw_prompt_ref=[] # Empty for now
73
+ )
74
+ entities.append(entity)
75
+
76
+ # Convert relations
77
+ relations = []
78
+ for sr in simple_kg.relations:
79
+ relation = Relation(
80
+ id=sr.id,
81
+ source=sr.source,
82
+ target=sr.target,
83
+ type=sr.type,
84
+ importance=normalize_importance(sr.importance), # Normalize importance
85
+ interaction_prompt="", # Empty as per requirements
86
+ interaction_prompt_ref=[] # Empty for now
87
+ )
88
+ relations.append(relation)
89
+
90
+ # Create full KnowledgeGraph
91
+ kg = KnowledgeGraph(
92
+ system_name=simple_kg.system_name,
93
+ system_summary=simple_kg.system_summary,
94
+ entities=entities,
95
+ relations=relations,
96
+ failures=None, # Not generated by this simple method
97
+ optimizations=None # Not generated by this simple method
98
+ )
99
+
100
+ return kg
101
+
102
+ class OpenAIStructuredExtractor:
103
+ """
104
+ Simple knowledge graph extractor using OpenAI's structured outputs.
105
+ """
106
+
107
+ def __init__(self, model: str = "gpt-4o-2024-08-06"):
108
+ """
109
+ Initialize the extractor.
110
+
111
+ Args:
112
+ model: OpenAI model to use (must support structured outputs)
113
+ """
114
+ self.model = model
115
+ self.client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
116
+ logger.info(f"OpenAI Structured Extractor initialized with model: {model}")
117
+
118
+ def extract_knowledge_graph(self, input_data: str, context_documents: Optional[List[Dict[str, Any]]] = None) -> KnowledgeGraph:
119
+ """
120
+ Extract knowledge graph from input data using OpenAI structured outputs.
121
+
122
+ Args:
123
+ input_data: The trace data to analyze
124
+ context_documents: Optional context documents (unused in this simple version)
125
+
126
+ Returns:
127
+ KnowledgeGraph: Extracted knowledge graph
128
+ """
129
+ logger.info(f"Starting knowledge graph extraction for {len(input_data)} characters of input")
130
+
131
+ # Simple system prompt - much shorter than the complex ones
132
+ system_prompt = """You are an expert at analyzing agent system traces and extracting knowledge graphs.
133
+
134
+ Extract a knowledge graph with these entity types:
135
+ - Agent: AI agents with specific roles
136
+ - Task: Specific tasks or objectives
137
+ - Tool: Tools or functions used
138
+ - Input: Data inputs to the system
139
+ - Output: Data outputs from the system
140
+ - Human: Human users or stakeholders
141
+
142
+ Use these relationship types:
143
+ - CONSUMED_BY: Input→Agent
144
+ - PERFORMS: Agent→Task
145
+ - ASSIGNED_TO: Task→Agent
146
+ - USES: Agent→Tool
147
+ - REQUIRED_BY: Tool→Task
148
+ - SUBTASK_OF: Task→Task
149
+ - NEXT: Task→Task (sequence)
150
+ - PRODUCES: Task→Output
151
+ - DELIVERS_TO: Output→Human
152
+ - INTERVENES: Agent/Human→Task
153
+
154
+ Create a complete knowledge graph with:
155
+ 1. Meaningful entities with descriptive names
156
+ 2. Logical relationships between entities
157
+ 3. A system name and summary
158
+ 4. At least 3-5 entities for any non-trivial workflow
159
+
160
+ Focus on identifying the actual workflow, not framework details."""
161
+
162
+ user_prompt = f"Analyze this agent system trace and extract a knowledge graph:\n\n{input_data}"
163
+
164
+ try:
165
+ response = self.client.responses.parse(
166
+ model=self.model,
167
+ input=[
168
+ {"role": "system", "content": system_prompt},
169
+ {"role": "user", "content": user_prompt}
170
+ ],
171
+ text_format=SimpleKnowledgeGraph,
172
+ )
173
+
174
+ # Get the parsed response and convert to full model
175
+ simple_kg = response.output_parsed
176
+ knowledge_graph = convert_simple_to_full_kg(simple_kg)
177
+
178
+ logger.info(f"Extraction complete: {len(knowledge_graph.entities)} entities, {len(knowledge_graph.relations)} relations")
179
+ return knowledge_graph
180
+
181
+ except Exception as e:
182
+ logger.error(f"Extraction failed: {e}")
183
+ raise
184
+
185
+ def process_text(self, input_data: str) -> Dict[str, Any]:
186
+ """
187
+ Process text and return structured response (for compatibility with extraction factory).
188
+
189
+ Args:
190
+ input_data: The trace data to analyze
191
+
192
+ Returns:
193
+ Dict with success status and kg_data
194
+ """
195
+ try:
196
+ kg = self.extract_knowledge_graph(input_data)
197
+ return {
198
+ "success": True,
199
+ "kg_data": kg.model_dump()
200
+ }
201
+ except Exception as e:
202
+ return {
203
+ "success": False,
204
+ "error": str(e),
205
+ "kg_data": {}
206
+ }
207
+
208
+ def extract_knowledge_graph_with_context(
209
+ input_data: str,
210
+ context_documents: Optional[List[Dict[str, Any]]] = None,
211
+ model: str = "gpt-4o-2024-08-06"
212
+ ) -> KnowledgeGraph:
213
+ """
214
+ Main entry point for knowledge graph extraction.
215
+
216
+ Args:
217
+ input_data: The trace data to analyze
218
+ context_documents: Optional context documents
219
+ model: OpenAI model to use
220
+
221
+ Returns:
222
+ KnowledgeGraph: Extracted knowledge graph
223
+ """
224
+ extractor = OpenAIStructuredExtractor(model=model)
225
+ return extractor.extract_knowledge_graph(input_data, context_documents)
226
+
227
+ def extract_knowledge_graph(input_data: str) -> KnowledgeGraph:
228
+ """
229
+ Simple entry point without context (for backward compatibility).
230
+
231
+ Args:
232
+ input_data: The trace data to analyze
233
+
234
+ Returns:
235
+ KnowledgeGraph: Extracted knowledge graph
236
+ """
237
+ return extract_knowledge_graph_with_context(input_data)
238
+
239
+ # Factory class for integration
240
+ class OpenAIStructuredFactory:
241
+ """Factory class for OpenAI structured extraction method."""
242
+
243
+ def __init__(self, model: str = "gpt-4o-2024-08-06"):
244
+ self.model = model
245
+ self.extractor = OpenAIStructuredExtractor(model)
246
+
247
+ def set_model(self, model: str):
248
+ """Set the model for this factory."""
249
+ self.model = model
250
+ self.extractor = OpenAIStructuredExtractor(model)
251
+
252
+ def process_text(self, input_data: str) -> Dict[str, Any]:
253
+ """Process text using the extractor."""
254
+ return self.extractor.process_text(input_data)
255
+
256
+ # Export factory instance
257
+ openai_structured_factory = OpenAIStructuredFactory()
258
+
259
+ if __name__ == "__main__":
260
+ # Simple test
261
+ test_input = """
262
+ Agent: DataAnalyzer
263
+ Task: Analyze customer data and generate insights
264
+ Tool: pandas_analyzer
265
+
266
+ The DataAnalyzer agent processes customer data using pandas_analyzer tool
267
+ to generate business insights for the marketing team.
268
+ """
269
+
270
+ print("Testing OpenAI Structured Extractor...")
271
+ try:
272
+ kg = extract_knowledge_graph(test_input)
273
+ print(f"✅ Success! Extracted {len(kg.entities)} entities and {len(kg.relations)} relations")
274
+ print(f"System: {kg.system_name}")
275
+ print(f"Summary: {kg.system_summary}")
276
+ except Exception as e:
277
+ print(f"❌ Error: {e}")
agentgraph/shared/method_registry.py CHANGED
@@ -35,6 +35,17 @@ AVAILABLE_METHODS = {
35
  "processing_type": "async_crew"
36
  },
37
 
 
 
 
 
 
 
 
 
 
 
 
38
  # Baseline methods using direct-based schema
39
  "original_method": {
40
  "name": "Original Method",
 
35
  "processing_type": "async_crew"
36
  },
37
 
38
+ "openai_structured": {
39
+ "name": "OpenAI Structured Outputs",
40
+ "description": "Simple OpenAI structured outputs extractor using Pydantic models",
41
+ "method_type": MethodType.PRODUCTION,
42
+ "schema_type": SchemaType.REFERENCE_BASED,
43
+ "module_path": "agentgraph.methods.production.openai_structured_extractor",
44
+ "class_name": "OpenAIStructuredFactory",
45
+ "supported_features": ["structured_outputs", "direct_extraction"],
46
+ "processing_type": "direct_call"
47
+ },
48
+
49
  # Baseline methods using direct-based schema
50
  "original_method": {
51
  "name": "Original Method",
frontend/src/components/shared/modals/SplitterSelectionModal.tsx CHANGED
@@ -290,7 +290,11 @@ export function SplitterSelectionModal({
290
  <div className="flex items-center gap-2">
291
  <Brain className="h-4 w-4 text-blue-500" />
292
  <p className="text-xs text-muted-foreground">
293
- <span className="font-medium text-foreground">Smart Chunking:</span> Balance context preservation with processing speed - defaults optimized for most traces.
 
 
 
 
294
  </p>
295
  </div>
296
  </div>
 
290
  <div className="flex items-center gap-2">
291
  <Brain className="h-4 w-4 text-blue-500" />
292
  <p className="text-xs text-muted-foreground">
293
+ <span className="font-medium text-foreground">
294
+ Smart Chunking:
295
+ </span>{" "}
296
+ Balance context preservation with processing speed -
297
+ defaults optimized for most traces.
298
  </p>
299
  </div>
300
  </div>
simple_test.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Simple test to verify OpenAI structured outputs functionality
4
+ """
5
+
6
+ import os
7
+ from dotenv import load_dotenv
8
+ from openai import OpenAI
9
+ from pydantic import BaseModel
10
+ from typing import List
11
+
12
+ # Load environment variables
13
+ load_dotenv('/Users/zekunwu/Desktop/agent_monitoring/.env')
14
+
15
+ class SimpleEntity(BaseModel):
16
+ name: str
17
+ type: str
18
+
19
+ class SimpleKG(BaseModel):
20
+ entities: List[SimpleEntity]
21
+ system_name: str
22
+
23
+ def test_basic_openai():
24
+ """Test basic OpenAI structured outputs"""
25
+
26
+ # Check if API key exists
27
+ api_key = os.getenv("OPENAI_API_KEY")
28
+ if not api_key:
29
+ print("❌ OPENAI_API_KEY not found in environment")
30
+ return False
31
+
32
+ if api_key == "your_openai_api_key_here":
33
+ print("❌ Please set a real OpenAI API key in .env file")
34
+ return False
35
+
36
+ print(f"✅ API key found: {api_key[:10]}...")
37
+
38
+ try:
39
+ client = OpenAI(api_key=api_key)
40
+
41
+ response = client.responses.parse(
42
+ model="gpt-4o-2024-08-06",
43
+ input=[
44
+ {"role": "system", "content": "Extract entities from text."},
45
+ {"role": "user", "content": "Alice the manager uses Excel tool to analyze data."}
46
+ ],
47
+ text_format=SimpleKG,
48
+ )
49
+
50
+ result = response.output_parsed
51
+ print(f"✅ OpenAI API call successful!")
52
+ print(f"System: {result.system_name}")
53
+ print(f"Entities: {len(result.entities)}")
54
+ for entity in result.entities:
55
+ print(f" - {entity.type}: {entity.name}")
56
+
57
+ return True
58
+
59
+ except Exception as e:
60
+ print(f"❌ OpenAI API call failed: {e}")
61
+ return False
62
+
63
+ if __name__ == "__main__":
64
+ print("🧪 Testing Basic OpenAI Structured Outputs")
65
+ print("=" * 50)
66
+
67
+ success = test_basic_openai()
68
+
69
+ if success:
70
+ print("\n🎉 Basic test passed! Ready to use OpenAI structured outputs.")
71
+ else:
72
+ print("\n💥 Basic test failed. Please check your OpenAI API key.")
test_simple_kg.py ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Test with simplified KnowledgeGraph model
4
+ """
5
+
6
+ import os
7
+ import sys
8
+ from dotenv import load_dotenv
9
+ from openai import OpenAI
10
+ from pydantic import BaseModel, Field
11
+ from typing import List, Optional
12
+
13
+ # Load environment variables
14
+ load_dotenv('/Users/zekunwu/Desktop/agent_monitoring/.env')
15
+
16
+ # Simplified models
17
+ class SimpleEntity(BaseModel):
18
+ id: str
19
+ type: str # Agent, Task, Tool, Input, Output, Human
20
+ name: str
21
+ importance: str # HIGH, MEDIUM, LOW
22
+
23
+ class SimpleRelation(BaseModel):
24
+ id: str
25
+ source: str
26
+ target: str
27
+ type: str # PERFORMS, USES, etc.
28
+ importance: str
29
+
30
+ class SimpleKnowledgeGraph(BaseModel):
31
+ system_name: str = Field("", description="Name of the system")
32
+ system_summary: str = Field("", description="Summary of the system")
33
+ entities: List[SimpleEntity] = Field(default_factory=list)
34
+ relations: List[SimpleRelation] = Field(default_factory=list)
35
+
36
+ def test_simple_kg():
37
+ """Test with simplified KG model"""
38
+
39
+ client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
40
+
41
+ test_input = """
42
+ Assistant: I'll help you analyze the customer data to find purchasing patterns.
43
+
44
+ Action: load_data
45
+ Action Input: {"dataset": "customer_purchases.csv"}
46
+ Observation: Data loaded successfully. Found 10,000 customer records.
47
+
48
+ Action: analyze_patterns
49
+ Action Input: {"columns": ["purchase_amount", "product_category", "customer_age"]}
50
+ Observation: Analysis complete. Found strong correlation between age and product preferences.
51
+
52
+ Final Answer: Based on the analysis, customers aged 25-35 prefer electronics.
53
+ """
54
+
55
+ system_prompt = """Extract a knowledge graph with these entity types:
56
+ - Agent: AI agents
57
+ - Task: Specific tasks
58
+ - Tool: Tools or functions
59
+ - Input: Data inputs
60
+ - Output: Data outputs
61
+ - Human: Human users
62
+
63
+ Use these relationship types:
64
+ - PERFORMS: Agent→Task
65
+ - USES: Agent→Tool
66
+ - PRODUCES: Task→Output
67
+
68
+ Create entities with IDs like agent_001, task_001, etc."""
69
+
70
+ try:
71
+ print("🧪 Testing Simplified Knowledge Graph Extraction")
72
+ print("=" * 60)
73
+
74
+ response = client.responses.parse(
75
+ model="gpt-4o-2024-08-06",
76
+ input=[
77
+ {"role": "system", "content": system_prompt},
78
+ {"role": "user", "content": f"Extract knowledge graph from: {test_input}"}
79
+ ],
80
+ text_format=SimpleKnowledgeGraph,
81
+ )
82
+
83
+ kg = response.output_parsed
84
+
85
+ print(f"✅ Extraction successful!")
86
+ print(f"📊 System: {kg.system_name}")
87
+ print(f"📝 Summary: {kg.system_summary}")
88
+ print(f"🔢 Entities: {len(kg.entities)}")
89
+ print(f"🔗 Relations: {len(kg.relations)}")
90
+
91
+ print("\n📋 Entities:")
92
+ for entity in kg.entities:
93
+ print(f" - {entity.id}: {entity.type} - {entity.name} ({entity.importance})")
94
+
95
+ print("\n🔗 Relations:")
96
+ for relation in kg.relations:
97
+ print(f" - {relation.id}: {relation.source} → {relation.target} ({relation.type})")
98
+
99
+ return True
100
+
101
+ except Exception as e:
102
+ print(f"❌ Test failed: {e}")
103
+ return False
104
+
105
+ if __name__ == "__main__":
106
+ success = test_simple_kg()
107
+ if success:
108
+ print("\n🎉 Simplified KG test passed!")
109
+ else:
110
+ print("\n💥 Simplified KG test failed.")