Spaces:

holistic-ai
/

AgentGraph

Running

wu981526092 commited on Sep 5, 2025

Commit

7e807a3

1 Parent(s): 7bca5b5

Add OpenAI Structured Outputs extraction method

- Implement simple OpenAI structured outputs extractor using Pydantic models
- Register as new production method 'openai_structured' in method registry
- Support direct extraction without complex multi-agent workflow
- Generate more complex knowledge graphs with better NEXT relationships
- Include factory integration for seamless system integration
- Build frontend with updated method selection capability

Files changed (6) hide show

agentgraph/methods/production/__init__.py +3 -1
agentgraph/methods/production/openai_structured_extractor.py +277 -0
agentgraph/shared/method_registry.py +11 -0
frontend/src/components/shared/modals/SplitterSelectionModal.tsx +5 -1
simple_test.py +72 -0
test_simple_kg.py +110 -0

agentgraph/methods/production/__init__.py CHANGED Viewed

@@ -7,8 +7,10 @@ These methods use content references and line numbers for precise content locati
 from . import multi_agent_knowledge_extractor
 from . import pydantic_multi_agent_knowledge_extractor
 __all__ = [
     "multi_agent_knowledge_extractor",
-    "pydantic_multi_agent_knowledge_extractor",
 ]

 from . import multi_agent_knowledge_extractor
 from . import pydantic_multi_agent_knowledge_extractor
+from . import openai_structured_extractor
 __all__ = [
     "multi_agent_knowledge_extractor",
+    "pydantic_multi_agent_knowledge_extractor",
+    "openai_structured_extractor",
 ]

agentgraph/methods/production/openai_structured_extractor.py ADDED Viewed

	@@ -0,0 +1,277 @@

+#!/usr/bin/env python3
+"""
+OpenAI Structured Outputs Knowledge Extractor
+A simple, direct approach using OpenAI's structured outputs API to extract
+knowledge graphs in one step using Pydantic models.
+"""
+import os
+import logging
+from typing import Optional, List, Dict, Any
+import uuid
+from datetime import datetime
+from dotenv import load_dotenv
+from openai import OpenAI
+from pydantic import BaseModel
+# Import Pydantic models
+from agentgraph.shared.models.reference_based import KnowledgeGraph, Entity, Relation
+# Load environment variables from root directory
+load_dotenv('/Users/zekunwu/Desktop/agent_monitoring/.env')
+# Configure logging
+logger = logging.getLogger(__name__)
+# Simplified models for OpenAI structured outputs
+class SimpleEntity(BaseModel):
+    id: str
+    type: str  # Agent, Task, Tool, Input, Output, Human
+    name: str
+    importance: str  # HIGH, MEDIUM, LOW
+class SimpleRelation(BaseModel):
+    id: str
+    source: str
+    target: str
+    type: str  # PERFORMS, USES, etc.
+    importance: str
+class SimpleKnowledgeGraph(BaseModel):
+    system_name: str
+    system_summary: str
+    entities: List[SimpleEntity]
+    relations: List[SimpleRelation]
+def normalize_importance(importance: str) -> str:
+    """Normalize importance values to HIGH/MEDIUM/LOW."""
+    importance_upper = importance.upper()
+    # Map common variations to standard values
+    mapping = {
+        "CRITICAL": "HIGH",
+        "VERY HIGH": "HIGH",
+        "VERY LOW": "LOW",
+        "NORMAL": "MEDIUM",
+        "STANDARD": "MEDIUM"
+    }
+    return mapping.get(importance_upper, importance_upper)
+def convert_simple_to_full_kg(simple_kg: SimpleKnowledgeGraph) -> KnowledgeGraph:
+    """Convert simplified KG to full KnowledgeGraph model."""
+    # Convert entities
+    entities = []
+    for se in simple_kg.entities:
+        entity = Entity(
+            id=se.id,
+            type=se.type,
+            name=se.name,
+            importance=normalize_importance(se.importance),  # Normalize importance
+            raw_prompt="",  # Empty as per requirements
+            raw_prompt_ref=[]  # Empty for now
+        )
+        entities.append(entity)
+    # Convert relations
+    relations = []
+    for sr in simple_kg.relations:
+        relation = Relation(
+            id=sr.id,
+            source=sr.source,
+            target=sr.target,
+            type=sr.type,
+            importance=normalize_importance(sr.importance),  # Normalize importance
+            interaction_prompt="",  # Empty as per requirements
+            interaction_prompt_ref=[]  # Empty for now
+        )
+        relations.append(relation)
+    # Create full KnowledgeGraph
+    kg = KnowledgeGraph(
+        system_name=simple_kg.system_name,
+        system_summary=simple_kg.system_summary,
+        entities=entities,
+        relations=relations,
+        failures=None,  # Not generated by this simple method
+        optimizations=None  # Not generated by this simple method
+    )
+    return kg
+class OpenAIStructuredExtractor:
+    """
+    Simple knowledge graph extractor using OpenAI's structured outputs.
+    """
+    def __init__(self, model: str = "gpt-4o-2024-08-06"):
+        """
+        Initialize the extractor.
+        Args:
+            model: OpenAI model to use (must support structured outputs)
+        """
+        self.model = model
+        self.client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+        logger.info(f"OpenAI Structured Extractor initialized with model: {model}")
+    def extract_knowledge_graph(self, input_data: str, context_documents: Optional[List[Dict[str, Any]]] = None) -> KnowledgeGraph:
+        """
+        Extract knowledge graph from input data using OpenAI structured outputs.
+        Args:
+            input_data: The trace data to analyze
+            context_documents: Optional context documents (unused in this simple version)
+        Returns:
+            KnowledgeGraph: Extracted knowledge graph
+        """
+        logger.info(f"Starting knowledge graph extraction for {len(input_data)} characters of input")
+        # Simple system prompt - much shorter than the complex ones
+        system_prompt = """You are an expert at analyzing agent system traces and extracting knowledge graphs.
+Extract a knowledge graph with these entity types:
+- Agent: AI agents with specific roles
+- Task: Specific tasks or objectives
+- Tool: Tools or functions used
+- Input: Data inputs to the system
+- Output: Data outputs from the system
+- Human: Human users or stakeholders
+Use these relationship types:
+- CONSUMED_BY: Input→Agent
+- PERFORMS: Agent→Task
+- ASSIGNED_TO: Task→Agent
+- USES: Agent→Tool
+- REQUIRED_BY: Tool→Task
+- SUBTASK_OF: Task→Task
+- NEXT: Task→Task (sequence)
+- PRODUCES: Task→Output
+- DELIVERS_TO: Output→Human
+- INTERVENES: Agent/Human→Task
+Create a complete knowledge graph with:
+1. Meaningful entities with descriptive names
+2. Logical relationships between entities
+3. A system name and summary
+4. At least 3-5 entities for any non-trivial workflow
+Focus on identifying the actual workflow, not framework details."""
+        user_prompt = f"Analyze this agent system trace and extract a knowledge graph:\n\n{input_data}"
+        try:
+            response = self.client.responses.parse(
+                model=self.model,
+                input=[
+                    {"role": "system", "content": system_prompt},
+                    {"role": "user", "content": user_prompt}
+                ],
+                text_format=SimpleKnowledgeGraph,
+            )
+            # Get the parsed response and convert to full model
+            simple_kg = response.output_parsed
+            knowledge_graph = convert_simple_to_full_kg(simple_kg)
+            logger.info(f"Extraction complete: {len(knowledge_graph.entities)} entities, {len(knowledge_graph.relations)} relations")
+            return knowledge_graph
+        except Exception as e:
+            logger.error(f"Extraction failed: {e}")
+            raise
+    def process_text(self, input_data: str) -> Dict[str, Any]:
+        """
+        Process text and return structured response (for compatibility with extraction factory).
+        Args:
+            input_data: The trace data to analyze
+        Returns:
+            Dict with success status and kg_data
+        """
+        try:
+            kg = self.extract_knowledge_graph(input_data)
+            return {
+                "success": True,
+                "kg_data": kg.model_dump()
+            }
+        except Exception as e:
+            return {
+                "success": False,
+                "error": str(e),
+                "kg_data": {}
+            }
+def extract_knowledge_graph_with_context(
+    input_data: str,
+    context_documents: Optional[List[Dict[str, Any]]] = None,
+    model: str = "gpt-4o-2024-08-06"
+) -> KnowledgeGraph:
+    """
+    Main entry point for knowledge graph extraction.
+    Args:
+        input_data: The trace data to analyze
+        context_documents: Optional context documents
+        model: OpenAI model to use
+    Returns:
+        KnowledgeGraph: Extracted knowledge graph
+    """
+    extractor = OpenAIStructuredExtractor(model=model)
+    return extractor.extract_knowledge_graph(input_data, context_documents)
+def extract_knowledge_graph(input_data: str) -> KnowledgeGraph:
+    """
+    Simple entry point without context (for backward compatibility).
+    Args:
+        input_data: The trace data to analyze
+    Returns:
+        KnowledgeGraph: Extracted knowledge graph
+    """
+    return extract_knowledge_graph_with_context(input_data)
+# Factory class for integration
+class OpenAIStructuredFactory:
+    """Factory class for OpenAI structured extraction method."""
+    def __init__(self, model: str = "gpt-4o-2024-08-06"):
+        self.model = model
+        self.extractor = OpenAIStructuredExtractor(model)
+    def set_model(self, model: str):
+        """Set the model for this factory."""
+        self.model = model
+        self.extractor = OpenAIStructuredExtractor(model)
+    def process_text(self, input_data: str) -> Dict[str, Any]:
+        """Process text using the extractor."""
+        return self.extractor.process_text(input_data)
+# Export factory instance
+openai_structured_factory = OpenAIStructuredFactory()
+if __name__ == "__main__":
+    # Simple test
+    test_input = """
+    Agent: DataAnalyzer
+    Task: Analyze customer data and generate insights
+    Tool: pandas_analyzer
+    The DataAnalyzer agent processes customer data using pandas_analyzer tool
+    to generate business insights for the marketing team.
+    """
+    print("Testing OpenAI Structured Extractor...")
+    try:
+        kg = extract_knowledge_graph(test_input)
+        print(f"✅ Success! Extracted {len(kg.entities)} entities and {len(kg.relations)} relations")
+        print(f"System: {kg.system_name}")
+        print(f"Summary: {kg.system_summary}")
+    except Exception as e:
+        print(f"❌ Error: {e}")

agentgraph/shared/method_registry.py CHANGED Viewed

@@ -35,6 +35,17 @@ AVAILABLE_METHODS = {
         "processing_type": "async_crew"
     },
     # Baseline methods using direct-based schema
     "original_method": {
         "name": "Original Method",

         "processing_type": "async_crew"
     },
+    "openai_structured": {
+        "name": "OpenAI Structured Outputs",
+        "description": "Simple OpenAI structured outputs extractor using Pydantic models",
+        "method_type": MethodType.PRODUCTION,
+        "schema_type": SchemaType.REFERENCE_BASED,
+        "module_path": "agentgraph.methods.production.openai_structured_extractor",
+        "class_name": "OpenAIStructuredFactory",
+        "supported_features": ["structured_outputs", "direct_extraction"],
+        "processing_type": "direct_call"
+    },
     # Baseline methods using direct-based schema
     "original_method": {
         "name": "Original Method",

frontend/src/components/shared/modals/SplitterSelectionModal.tsx CHANGED Viewed

@@ -290,7 +290,11 @@ export function SplitterSelectionModal({
                 <div className="flex items-center gap-2">
                   <Brain className="h-4 w-4 text-blue-500" />
                   <p className="text-xs text-muted-foreground">
-                    <span className="font-medium text-foreground">Smart Chunking:</span> Balance context preservation with processing speed - defaults optimized for most traces.
                   </p>
                 </div>
               </div>

                 <div className="flex items-center gap-2">
                   <Brain className="h-4 w-4 text-blue-500" />
                   <p className="text-xs text-muted-foreground">
+                    <span className="font-medium text-foreground">
+                      Smart Chunking:
+                    </span>{" "}
+                    Balance context preservation with processing speed -
+                    defaults optimized for most traces.
                   </p>
                 </div>
               </div>

simple_test.py ADDED Viewed

	@@ -0,0 +1,72 @@

+#!/usr/bin/env python3
+"""
+Simple test to verify OpenAI structured outputs functionality
+"""
+import os
+from dotenv import load_dotenv
+from openai import OpenAI
+from pydantic import BaseModel
+from typing import List
+# Load environment variables
+load_dotenv('/Users/zekunwu/Desktop/agent_monitoring/.env')
+class SimpleEntity(BaseModel):
+    name: str
+    type: str
+class SimpleKG(BaseModel):
+    entities: List[SimpleEntity]
+    system_name: str
+def test_basic_openai():
+    """Test basic OpenAI structured outputs"""
+    # Check if API key exists
+    api_key = os.getenv("OPENAI_API_KEY")
+    if not api_key:
+        print("❌ OPENAI_API_KEY not found in environment")
+        return False
+    if api_key == "your_openai_api_key_here":
+        print("❌ Please set a real OpenAI API key in .env file")
+        return False
+    print(f"✅ API key found: {api_key[:10]}...")
+    try:
+        client = OpenAI(api_key=api_key)
+        response = client.responses.parse(
+            model="gpt-4o-2024-08-06",
+            input=[
+                {"role": "system", "content": "Extract entities from text."},
+                {"role": "user", "content": "Alice the manager uses Excel tool to analyze data."}
+            ],
+            text_format=SimpleKG,
+        )
+        result = response.output_parsed
+        print(f"✅ OpenAI API call successful!")
+        print(f"System: {result.system_name}")
+        print(f"Entities: {len(result.entities)}")
+        for entity in result.entities:
+            print(f"  - {entity.type}: {entity.name}")
+        return True
+    except Exception as e:
+        print(f"❌ OpenAI API call failed: {e}")
+        return False
+if __name__ == "__main__":
+    print("🧪 Testing Basic OpenAI Structured Outputs")
+    print("=" * 50)
+    success = test_basic_openai()
+    if success:
+        print("\n🎉 Basic test passed! Ready to use OpenAI structured outputs.")
+    else:
+        print("\n💥 Basic test failed. Please check your OpenAI API key.")

test_simple_kg.py ADDED Viewed

	@@ -0,0 +1,110 @@

+#!/usr/bin/env python3
+"""
+Test with simplified KnowledgeGraph model
+"""
+import os
+import sys
+from dotenv import load_dotenv
+from openai import OpenAI
+from pydantic import BaseModel, Field
+from typing import List, Optional
+# Load environment variables
+load_dotenv('/Users/zekunwu/Desktop/agent_monitoring/.env')
+# Simplified models
+class SimpleEntity(BaseModel):
+    id: str
+    type: str  # Agent, Task, Tool, Input, Output, Human
+    name: str
+    importance: str  # HIGH, MEDIUM, LOW
+class SimpleRelation(BaseModel):
+    id: str
+    source: str
+    target: str
+    type: str  # PERFORMS, USES, etc.
+    importance: str
+class SimpleKnowledgeGraph(BaseModel):
+    system_name: str = Field("", description="Name of the system")
+    system_summary: str = Field("", description="Summary of the system")
+    entities: List[SimpleEntity] = Field(default_factory=list)
+    relations: List[SimpleRelation] = Field(default_factory=list)
+def test_simple_kg():
+    """Test with simplified KG model"""
+    client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+    test_input = """
+    Assistant: I'll help you analyze the customer data to find purchasing patterns.
+    Action: load_data
+    Action Input: {"dataset": "customer_purchases.csv"}
+    Observation: Data loaded successfully. Found 10,000 customer records.
+    Action: analyze_patterns
+    Action Input: {"columns": ["purchase_amount", "product_category", "customer_age"]}
+    Observation: Analysis complete. Found strong correlation between age and product preferences.
+    Final Answer: Based on the analysis, customers aged 25-35 prefer electronics.
+    """
+    system_prompt = """Extract a knowledge graph with these entity types:
+- Agent: AI agents
+- Task: Specific tasks
+- Tool: Tools or functions
+- Input: Data inputs
+- Output: Data outputs
+- Human: Human users
+Use these relationship types:
+- PERFORMS: Agent→Task
+- USES: Agent→Tool
+- PRODUCES: Task→Output
+Create entities with IDs like agent_001, task_001, etc."""
+    try:
+        print("🧪 Testing Simplified Knowledge Graph Extraction")
+        print("=" * 60)
+        response = client.responses.parse(
+            model="gpt-4o-2024-08-06",
+            input=[
+                {"role": "system", "content": system_prompt},
+                {"role": "user", "content": f"Extract knowledge graph from: {test_input}"}
+            ],
+            text_format=SimpleKnowledgeGraph,
+        )
+        kg = response.output_parsed
+        print(f"✅ Extraction successful!")
+        print(f"📊 System: {kg.system_name}")
+        print(f"📝 Summary: {kg.system_summary}")
+        print(f"🔢 Entities: {len(kg.entities)}")
+        print(f"🔗 Relations: {len(kg.relations)}")
+        print("\n📋 Entities:")
+        for entity in kg.entities:
+            print(f"  - {entity.id}: {entity.type} - {entity.name} ({entity.importance})")
+        print("\n🔗 Relations:")
+        for relation in kg.relations:
+            print(f"  - {relation.id}: {relation.source} → {relation.target} ({relation.type})")
+        return True
+    except Exception as e:
+        print(f"❌ Test failed: {e}")
+        return False
+if __name__ == "__main__":
+    success = test_simple_kg()
+    if success:
+        print("\n🎉 Simplified KG test passed!")
+    else:
+        print("\n💥 Simplified KG test failed.")