AgentGraph / agentgraph /shared /method_registry.py
wu981526092's picture
ad
f0fc928
"""
Method Registry for Knowledge Extraction Methods
This module provides a centralized registry for all available knowledge extraction methods
and their associated schemas. Each method is bound to a specific schema type.
"""
from enum import Enum
from typing import Any, Dict, List, Optional
class MethodType(Enum):
"""Types of extraction methods"""
PRODUCTION = "production"
BASELINE = "baseline"
class SchemaType(Enum):
"""Types of schemas used by methods"""
REFERENCE_BASED = "reference_based"
DIRECT_BASED = "direct_based"
# Method Registry - maps method names to their implementations and schemas
AVAILABLE_METHODS = {
# Production method: OpenAI Structured Outputs (simple, direct_call)
"openai_structured": {
"name": "OpenAI Structured Outputs",
"description": "Simple OpenAI structured outputs extractor using Pydantic models",
"method_type": MethodType.PRODUCTION,
"schema_type": SchemaType.REFERENCE_BASED,
"module_path": "agentgraph.methods.production.openai_structured_extractor",
"class_name": "OpenAIStructuredFactory",
"supported_features": ["structured_outputs", "direct_extraction"],
"processing_type": "direct_call"
},
# Production method using reference-based schema
"production": {
"name": "Multi-Agent Knowledge Extractor",
"description": "Production CrewAI-based multi-agent system with content reference resolution",
"method_type": MethodType.PRODUCTION,
"schema_type": SchemaType.REFERENCE_BASED,
"module_path": "agentgraph.methods.production.multi_agent_knowledge_extractor",
"class_name": "agent_monitoring_crew_factory",
"supported_features": ["content_references", "failure_detection", "line_numbers"],
"processing_type": "async_crew"
},
"openai_structured": {
"name": "OpenAI Structured Outputs",
"description": "Simple OpenAI structured outputs extractor using Pydantic models",
"method_type": MethodType.PRODUCTION,
"schema_type": SchemaType.REFERENCE_BASED,
"module_path": "agentgraph.methods.production.openai_structured_extractor",
"class_name": "OpenAIStructuredFactory",
"supported_features": ["structured_outputs", "direct_extraction"],
"processing_type": "direct_call"
},
# Baseline methods using direct-based schema
"original_method": {
"name": "Original Method",
"description": "Original baseline extraction method",
"method_type": MethodType.BASELINE,
"schema_type": SchemaType.DIRECT_BASED,
"module_path": "agentgraph.methods.baseline.original_method",
"class_name": "OriginalKnowledgeExtractionMethod",
"supported_features": ["direct_extraction"],
"processing_type": "direct_call"
},
"clustering_method": {
"name": "Clustering Method",
"description": "Clustering-based extraction method",
"method_type": MethodType.BASELINE,
"schema_type": SchemaType.DIRECT_BASED,
"module_path": "agentgraph.methods.baseline.clustering_method",
"class_name": "ClusteringKnowledgeExtractionMethod",
"supported_features": ["direct_extraction", "clustering"],
"processing_type": "direct_call"
},
"direct_llm_method": {
"name": "Direct LLM Method",
"description": "Direct LLM-based extraction method",
"method_type": MethodType.BASELINE,
"schema_type": SchemaType.DIRECT_BASED,
"module_path": "agentgraph.methods.baseline.direct_llm_method",
"class_name": "DirectLLMKnowledgeExtractor",
"supported_features": ["direct_extraction", "llm"],
"processing_type": "direct_call"
},
"hybrid_method": {
"name": "Hybrid Method",
"description": "Hybrid extraction combining multiple approaches",
"method_type": MethodType.BASELINE,
"schema_type": SchemaType.DIRECT_BASED,
"module_path": "agentgraph.methods.baseline.hybrid_method",
"class_name": "HybridKnowledgeExtractionMethod",
"supported_features": ["direct_extraction", "hybrid"],
"processing_type": "direct_call"
},
"pydantic_method": {
"name": "Pydantic Method",
"description": "Pydantic-based extraction method",
"method_type": MethodType.BASELINE,
"schema_type": SchemaType.DIRECT_BASED,
"module_path": "agentgraph.methods.baseline.pydantic_method",
"class_name": "PydanticKnowledgeExtractor",
"supported_features": ["direct_extraction", "pydantic"],
"processing_type": "direct_call"
},
"unified_method": {
"name": "Unified Method",
"description": "Unified extraction method",
"method_type": MethodType.BASELINE,
"schema_type": SchemaType.DIRECT_BASED,
"module_path": "agentgraph.methods.baseline.unified_method",
"class_name": "UnifiedKnowledgeExtractionMethod",
"supported_features": ["direct_extraction", "unified"],
"processing_type": "direct_call"
},
"openai_agent": {
"name": "OpenAI Agent",
"description": "OpenAI Agent with function tools and validation",
"method_type": MethodType.BASELINE,
"schema_type": SchemaType.DIRECT_BASED,
"module_path": "agentgraph.methods.baseline.openai_agent",
"class_name": "OpenAIAgentKnowledgeExtractor",
"supported_features": ["direct_extraction", "pipeline", "validation_improvement", "graph_enhancement"],
"processing_type": "direct_call"
},
"sequential_pydantic": {
"name": "Sequential Pydantic",
"description": "Sequential Pydantic-based extraction method",
"method_type": MethodType.BASELINE,
"schema_type": SchemaType.DIRECT_BASED,
"module_path": "agentgraph.methods.baseline.pydantic_method",
"class_name": "PydanticKnowledgeExtractor",
"supported_features": ["direct_extraction", "pydantic", "sequential"],
"processing_type": "direct_call"
},
"pydantic_hybrid_method": {
"name": "Pydantic Hybrid Method",
"description": "Hybrid Pydantic-based extraction method",
"method_type": MethodType.BASELINE,
"schema_type": SchemaType.DIRECT_BASED,
"module_path": "agentgraph.methods.baseline.pydantic_method",
"class_name": "PydanticKnowledgeExtractor",
"supported_features": ["direct_extraction", "pydantic", "hybrid"],
"processing_type": "direct_call"
},
# rule_based_method removed due to import errors
}
# Default method configuration
DEFAULT_METHOD = "openai_structured"
def get_available_methods() -> Dict[str, Dict[str, Any]]:
"""Get all available methods with their metadata"""
return AVAILABLE_METHODS.copy()
def get_method_info(method_name: str) -> Optional[Dict[str, Any]]:
"""Get information about a specific method"""
return AVAILABLE_METHODS.get(method_name)
def get_methods_by_type(method_type: MethodType) -> Dict[str, Dict[str, Any]]:
"""Get methods filtered by type"""
return {
name: info for name, info in AVAILABLE_METHODS.items()
if info["method_type"] == method_type
}
def get_methods_by_schema(schema_type: SchemaType) -> Dict[str, Dict[str, Any]]:
"""Get methods filtered by schema type"""
return {
name: info for name, info in AVAILABLE_METHODS.items()
if info["schema_type"] == schema_type
}
def get_schema_for_method(method_name: str) -> Optional[SchemaType]:
"""Get the schema type for a specific method"""
method_info = get_method_info(method_name)
return method_info["schema_type"] if method_info else None
def is_valid_method(method_name: str) -> bool:
"""Check if a method name is valid"""
return method_name in AVAILABLE_METHODS
def get_method_names() -> List[str]:
"""Get list of all method names"""
return list(AVAILABLE_METHODS.keys())
def get_production_methods() -> List[str]:
"""Get list of production method names"""
return [
name for name, info in AVAILABLE_METHODS.items()
if info["method_type"] == MethodType.PRODUCTION
]
def get_baseline_methods() -> List[str]:
"""Get list of baseline method names"""
return [
name for name, info in AVAILABLE_METHODS.items()
if info["method_type"] == MethodType.BASELINE
]
def get_method_display_name(method_name: str) -> str:
"""Get display name for a method"""
method_info = get_method_info(method_name)
return method_info["name"] if method_info else method_name
def get_method_description(method_name: str) -> str:
"""Get description for a method"""
method_info = get_method_info(method_name)
return method_info["description"] if method_info else ""
def validate_method_schema_compatibility(method_name: str, expected_schema: SchemaType) -> bool:
"""Validate that a method uses the expected schema type"""
method_schema = get_schema_for_method(method_name)
return method_schema == expected_schema if method_schema else False