Spaces:

holistic-ai
/

AgentGraph

Running

AgentGraph / agentgraph /shared /method_registry.py

f0fc928 5 months ago

9.2 kB

	"""
	Method Registry for Knowledge Extraction Methods

	This module provides a centralized registry for all available knowledge extraction methods
	and their associated schemas. Each method is bound to a specific schema type.
	"""

	from enum import Enum
	from typing import Any, Dict, List, Optional


	class MethodType(Enum):
	"""Types of extraction methods"""
	PRODUCTION = "production"
	BASELINE = "baseline"


	class SchemaType(Enum):
	"""Types of schemas used by methods"""
	REFERENCE_BASED = "reference_based"
	DIRECT_BASED = "direct_based"


	# Method Registry - maps method names to their implementations and schemas
	AVAILABLE_METHODS = {
	# Production method: OpenAI Structured Outputs (simple, direct_call)
	"openai_structured": {
	"name": "OpenAI Structured Outputs",
	"description": "Simple OpenAI structured outputs extractor using Pydantic models",
	"method_type": MethodType.PRODUCTION,
	"schema_type": SchemaType.REFERENCE_BASED,
	"module_path": "agentgraph.methods.production.openai_structured_extractor",
	"class_name": "OpenAIStructuredFactory",
	"supported_features": ["structured_outputs", "direct_extraction"],
	"processing_type": "direct_call"
	},

	# Production method using reference-based schema
	"production": {
	"name": "Multi-Agent Knowledge Extractor",
	"description": "Production CrewAI-based multi-agent system with content reference resolution",
	"method_type": MethodType.PRODUCTION,
	"schema_type": SchemaType.REFERENCE_BASED,
	"module_path": "agentgraph.methods.production.multi_agent_knowledge_extractor",
	"class_name": "agent_monitoring_crew_factory",
	"supported_features": ["content_references", "failure_detection", "line_numbers"],
	"processing_type": "async_crew"
	},

	"openai_structured": {
	"name": "OpenAI Structured Outputs",
	"description": "Simple OpenAI structured outputs extractor using Pydantic models",
	"method_type": MethodType.PRODUCTION,
	"schema_type": SchemaType.REFERENCE_BASED,
	"module_path": "agentgraph.methods.production.openai_structured_extractor",
	"class_name": "OpenAIStructuredFactory",
	"supported_features": ["structured_outputs", "direct_extraction"],
	"processing_type": "direct_call"
	},

	# Baseline methods using direct-based schema
	"original_method": {
	"name": "Original Method",
	"description": "Original baseline extraction method",
	"method_type": MethodType.BASELINE,
	"schema_type": SchemaType.DIRECT_BASED,
	"module_path": "agentgraph.methods.baseline.original_method",
	"class_name": "OriginalKnowledgeExtractionMethod",
	"supported_features": ["direct_extraction"],
	"processing_type": "direct_call"
	},

	"clustering_method": {
	"name": "Clustering Method",
	"description": "Clustering-based extraction method",
	"method_type": MethodType.BASELINE,
	"schema_type": SchemaType.DIRECT_BASED,
	"module_path": "agentgraph.methods.baseline.clustering_method",
	"class_name": "ClusteringKnowledgeExtractionMethod",
	"supported_features": ["direct_extraction", "clustering"],
	"processing_type": "direct_call"
	},

	"direct_llm_method": {
	"name": "Direct LLM Method",
	"description": "Direct LLM-based extraction method",
	"method_type": MethodType.BASELINE,
	"schema_type": SchemaType.DIRECT_BASED,
	"module_path": "agentgraph.methods.baseline.direct_llm_method",
	"class_name": "DirectLLMKnowledgeExtractor",
	"supported_features": ["direct_extraction", "llm"],
	"processing_type": "direct_call"
	},

	"hybrid_method": {
	"name": "Hybrid Method",
	"description": "Hybrid extraction combining multiple approaches",
	"method_type": MethodType.BASELINE,
	"schema_type": SchemaType.DIRECT_BASED,
	"module_path": "agentgraph.methods.baseline.hybrid_method",
	"class_name": "HybridKnowledgeExtractionMethod",
	"supported_features": ["direct_extraction", "hybrid"],
	"processing_type": "direct_call"
	},

	"pydantic_method": {
	"name": "Pydantic Method",
	"description": "Pydantic-based extraction method",
	"method_type": MethodType.BASELINE,
	"schema_type": SchemaType.DIRECT_BASED,
	"module_path": "agentgraph.methods.baseline.pydantic_method",
	"class_name": "PydanticKnowledgeExtractor",
	"supported_features": ["direct_extraction", "pydantic"],
	"processing_type": "direct_call"
	},

	"unified_method": {
	"name": "Unified Method",
	"description": "Unified extraction method",
	"method_type": MethodType.BASELINE,
	"schema_type": SchemaType.DIRECT_BASED,
	"module_path": "agentgraph.methods.baseline.unified_method",
	"class_name": "UnifiedKnowledgeExtractionMethod",
	"supported_features": ["direct_extraction", "unified"],
	"processing_type": "direct_call"
	},

	"openai_agent": {
	"name": "OpenAI Agent",
	"description": "OpenAI Agent with function tools and validation",
	"method_type": MethodType.BASELINE,
	"schema_type": SchemaType.DIRECT_BASED,
	"module_path": "agentgraph.methods.baseline.openai_agent",
	"class_name": "OpenAIAgentKnowledgeExtractor",
	"supported_features": ["direct_extraction", "pipeline", "validation_improvement", "graph_enhancement"],
	"processing_type": "direct_call"
	},
	"sequential_pydantic": {
	"name": "Sequential Pydantic",
	"description": "Sequential Pydantic-based extraction method",
	"method_type": MethodType.BASELINE,
	"schema_type": SchemaType.DIRECT_BASED,
	"module_path": "agentgraph.methods.baseline.pydantic_method",
	"class_name": "PydanticKnowledgeExtractor",
	"supported_features": ["direct_extraction", "pydantic", "sequential"],
	"processing_type": "direct_call"
	},

	"pydantic_hybrid_method": {
	"name": "Pydantic Hybrid Method",
	"description": "Hybrid Pydantic-based extraction method",
	"method_type": MethodType.BASELINE,
	"schema_type": SchemaType.DIRECT_BASED,
	"module_path": "agentgraph.methods.baseline.pydantic_method",
	"class_name": "PydanticKnowledgeExtractor",
	"supported_features": ["direct_extraction", "pydantic", "hybrid"],
	"processing_type": "direct_call"
	},

	# rule_based_method removed due to import errors
	}


	# Default method configuration
	DEFAULT_METHOD = "openai_structured"


	def get_available_methods() -> Dict[str, Dict[str, Any]]:
	"""Get all available methods with their metadata"""
	return AVAILABLE_METHODS.copy()


	def get_method_info(method_name: str) -> Optional[Dict[str, Any]]:
	"""Get information about a specific method"""
	return AVAILABLE_METHODS.get(method_name)


	def get_methods_by_type(method_type: MethodType) -> Dict[str, Dict[str, Any]]:
	"""Get methods filtered by type"""
	return {
	name: info for name, info in AVAILABLE_METHODS.items()
	if info["method_type"] == method_type
	}


	def get_methods_by_schema(schema_type: SchemaType) -> Dict[str, Dict[str, Any]]:
	"""Get methods filtered by schema type"""
	return {
	name: info for name, info in AVAILABLE_METHODS.items()
	if info["schema_type"] == schema_type
	}


	def get_schema_for_method(method_name: str) -> Optional[SchemaType]:
	"""Get the schema type for a specific method"""
	method_info = get_method_info(method_name)
	return method_info["schema_type"] if method_info else None


	def is_valid_method(method_name: str) -> bool:
	"""Check if a method name is valid"""
	return method_name in AVAILABLE_METHODS


	def get_method_names() -> List[str]:
	"""Get list of all method names"""
	return list(AVAILABLE_METHODS.keys())


	def get_production_methods() -> List[str]:
	"""Get list of production method names"""
	return [
	name for name, info in AVAILABLE_METHODS.items()
	if info["method_type"] == MethodType.PRODUCTION
	]


	def get_baseline_methods() -> List[str]:
	"""Get list of baseline method names"""
	return [
	name for name, info in AVAILABLE_METHODS.items()
	if info["method_type"] == MethodType.BASELINE
	]


	def get_method_display_name(method_name: str) -> str:
	"""Get display name for a method"""
	method_info = get_method_info(method_name)
	return method_info["name"] if method_info else method_name


	def get_method_description(method_name: str) -> str:
	"""Get description for a method"""
	method_info = get_method_info(method_name)
	return method_info["description"] if method_info else ""


	def validate_method_schema_compatibility(method_name: str, expected_schema: SchemaType) -> bool:
	"""Validate that a method uses the expected schema type"""
	method_schema = get_schema_for_method(method_name)
	return method_schema == expected_schema if method_schema else False