""" Method Registry for Knowledge Extraction Methods This module provides a centralized registry for all available knowledge extraction methods and their associated schemas. Each method is bound to a specific schema type. """ from enum import Enum from typing import Any, Dict, List, Optional class MethodType(Enum): """Types of extraction methods""" PRODUCTION = "production" BASELINE = "baseline" class SchemaType(Enum): """Types of schemas used by methods""" REFERENCE_BASED = "reference_based" DIRECT_BASED = "direct_based" # Method Registry - maps method names to their implementations and schemas AVAILABLE_METHODS = { # Production method: OpenAI Structured Outputs (simple, direct_call) "openai_structured": { "name": "OpenAI Structured Outputs", "description": "Simple OpenAI structured outputs extractor using Pydantic models", "method_type": MethodType.PRODUCTION, "schema_type": SchemaType.REFERENCE_BASED, "module_path": "agentgraph.methods.production.openai_structured_extractor", "class_name": "OpenAIStructuredFactory", "supported_features": ["structured_outputs", "direct_extraction"], "processing_type": "direct_call" }, # Production method using reference-based schema "production": { "name": "Multi-Agent Knowledge Extractor", "description": "Production CrewAI-based multi-agent system with content reference resolution", "method_type": MethodType.PRODUCTION, "schema_type": SchemaType.REFERENCE_BASED, "module_path": "agentgraph.methods.production.multi_agent_knowledge_extractor", "class_name": "agent_monitoring_crew_factory", "supported_features": ["content_references", "failure_detection", "line_numbers"], "processing_type": "async_crew" }, "openai_structured": { "name": "OpenAI Structured Outputs", "description": "Simple OpenAI structured outputs extractor using Pydantic models", "method_type": MethodType.PRODUCTION, "schema_type": SchemaType.REFERENCE_BASED, "module_path": "agentgraph.methods.production.openai_structured_extractor", "class_name": "OpenAIStructuredFactory", "supported_features": ["structured_outputs", "direct_extraction"], "processing_type": "direct_call" }, # Baseline methods using direct-based schema "original_method": { "name": "Original Method", "description": "Original baseline extraction method", "method_type": MethodType.BASELINE, "schema_type": SchemaType.DIRECT_BASED, "module_path": "agentgraph.methods.baseline.original_method", "class_name": "OriginalKnowledgeExtractionMethod", "supported_features": ["direct_extraction"], "processing_type": "direct_call" }, "clustering_method": { "name": "Clustering Method", "description": "Clustering-based extraction method", "method_type": MethodType.BASELINE, "schema_type": SchemaType.DIRECT_BASED, "module_path": "agentgraph.methods.baseline.clustering_method", "class_name": "ClusteringKnowledgeExtractionMethod", "supported_features": ["direct_extraction", "clustering"], "processing_type": "direct_call" }, "direct_llm_method": { "name": "Direct LLM Method", "description": "Direct LLM-based extraction method", "method_type": MethodType.BASELINE, "schema_type": SchemaType.DIRECT_BASED, "module_path": "agentgraph.methods.baseline.direct_llm_method", "class_name": "DirectLLMKnowledgeExtractor", "supported_features": ["direct_extraction", "llm"], "processing_type": "direct_call" }, "hybrid_method": { "name": "Hybrid Method", "description": "Hybrid extraction combining multiple approaches", "method_type": MethodType.BASELINE, "schema_type": SchemaType.DIRECT_BASED, "module_path": "agentgraph.methods.baseline.hybrid_method", "class_name": "HybridKnowledgeExtractionMethod", "supported_features": ["direct_extraction", "hybrid"], "processing_type": "direct_call" }, "pydantic_method": { "name": "Pydantic Method", "description": "Pydantic-based extraction method", "method_type": MethodType.BASELINE, "schema_type": SchemaType.DIRECT_BASED, "module_path": "agentgraph.methods.baseline.pydantic_method", "class_name": "PydanticKnowledgeExtractor", "supported_features": ["direct_extraction", "pydantic"], "processing_type": "direct_call" }, "unified_method": { "name": "Unified Method", "description": "Unified extraction method", "method_type": MethodType.BASELINE, "schema_type": SchemaType.DIRECT_BASED, "module_path": "agentgraph.methods.baseline.unified_method", "class_name": "UnifiedKnowledgeExtractionMethod", "supported_features": ["direct_extraction", "unified"], "processing_type": "direct_call" }, "openai_agent": { "name": "OpenAI Agent", "description": "OpenAI Agent with function tools and validation", "method_type": MethodType.BASELINE, "schema_type": SchemaType.DIRECT_BASED, "module_path": "agentgraph.methods.baseline.openai_agent", "class_name": "OpenAIAgentKnowledgeExtractor", "supported_features": ["direct_extraction", "pipeline", "validation_improvement", "graph_enhancement"], "processing_type": "direct_call" }, "sequential_pydantic": { "name": "Sequential Pydantic", "description": "Sequential Pydantic-based extraction method", "method_type": MethodType.BASELINE, "schema_type": SchemaType.DIRECT_BASED, "module_path": "agentgraph.methods.baseline.pydantic_method", "class_name": "PydanticKnowledgeExtractor", "supported_features": ["direct_extraction", "pydantic", "sequential"], "processing_type": "direct_call" }, "pydantic_hybrid_method": { "name": "Pydantic Hybrid Method", "description": "Hybrid Pydantic-based extraction method", "method_type": MethodType.BASELINE, "schema_type": SchemaType.DIRECT_BASED, "module_path": "agentgraph.methods.baseline.pydantic_method", "class_name": "PydanticKnowledgeExtractor", "supported_features": ["direct_extraction", "pydantic", "hybrid"], "processing_type": "direct_call" }, # rule_based_method removed due to import errors } # Default method configuration DEFAULT_METHOD = "openai_structured" def get_available_methods() -> Dict[str, Dict[str, Any]]: """Get all available methods with their metadata""" return AVAILABLE_METHODS.copy() def get_method_info(method_name: str) -> Optional[Dict[str, Any]]: """Get information about a specific method""" return AVAILABLE_METHODS.get(method_name) def get_methods_by_type(method_type: MethodType) -> Dict[str, Dict[str, Any]]: """Get methods filtered by type""" return { name: info for name, info in AVAILABLE_METHODS.items() if info["method_type"] == method_type } def get_methods_by_schema(schema_type: SchemaType) -> Dict[str, Dict[str, Any]]: """Get methods filtered by schema type""" return { name: info for name, info in AVAILABLE_METHODS.items() if info["schema_type"] == schema_type } def get_schema_for_method(method_name: str) -> Optional[SchemaType]: """Get the schema type for a specific method""" method_info = get_method_info(method_name) return method_info["schema_type"] if method_info else None def is_valid_method(method_name: str) -> bool: """Check if a method name is valid""" return method_name in AVAILABLE_METHODS def get_method_names() -> List[str]: """Get list of all method names""" return list(AVAILABLE_METHODS.keys()) def get_production_methods() -> List[str]: """Get list of production method names""" return [ name for name, info in AVAILABLE_METHODS.items() if info["method_type"] == MethodType.PRODUCTION ] def get_baseline_methods() -> List[str]: """Get list of baseline method names""" return [ name for name, info in AVAILABLE_METHODS.items() if info["method_type"] == MethodType.BASELINE ] def get_method_display_name(method_name: str) -> str: """Get display name for a method""" method_info = get_method_info(method_name) return method_info["name"] if method_info else method_name def get_method_description(method_name: str) -> str: """Get description for a method""" method_info = get_method_info(method_name) return method_info["description"] if method_info else "" def validate_method_schema_compatibility(method_name: str, expected_schema: SchemaType) -> bool: """Validate that a method uses the expected schema type""" method_schema = get_schema_for_method(method_name) return method_schema == expected_schema if method_schema else False