Spaces:

holistic-ai
/

AgentGraph

Running

File size: 9,199 Bytes

"""
Method Registry for Knowledge Extraction Methods

This module provides a centralized registry for all available knowledge extraction methods
and their associated schemas. Each method is bound to a specific schema type.
"""

from enum import Enum
from typing import Any, Dict, List, Optional


class MethodType(Enum):
    """Types of extraction methods"""
    PRODUCTION = "production"
    BASELINE = "baseline"


class SchemaType(Enum):
    """Types of schemas used by methods"""
    REFERENCE_BASED = "reference_based"
    DIRECT_BASED = "direct_based"


# Method Registry - maps method names to their implementations and schemas
AVAILABLE_METHODS = {
    # Production method: OpenAI Structured Outputs (simple, direct_call)
    "openai_structured": {
        "name": "OpenAI Structured Outputs",
        "description": "Simple OpenAI structured outputs extractor using Pydantic models",
        "method_type": MethodType.PRODUCTION,
        "schema_type": SchemaType.REFERENCE_BASED,
        "module_path": "agentgraph.methods.production.openai_structured_extractor",
        "class_name": "OpenAIStructuredFactory",
        "supported_features": ["structured_outputs", "direct_extraction"],
        "processing_type": "direct_call"
    },

    # Production method using reference-based schema
    "production": {
        "name": "Multi-Agent Knowledge Extractor",
        "description": "Production CrewAI-based multi-agent system with content reference resolution",
        "method_type": MethodType.PRODUCTION,
        "schema_type": SchemaType.REFERENCE_BASED,
        "module_path": "agentgraph.methods.production.multi_agent_knowledge_extractor",
        "class_name": "agent_monitoring_crew_factory",
        "supported_features": ["content_references", "failure_detection", "line_numbers"],
        "processing_type": "async_crew"
    },
    
    "openai_structured": {
        "name": "OpenAI Structured Outputs",
        "description": "Simple OpenAI structured outputs extractor using Pydantic models",
        "method_type": MethodType.PRODUCTION,
        "schema_type": SchemaType.REFERENCE_BASED,
        "module_path": "agentgraph.methods.production.openai_structured_extractor",
        "class_name": "OpenAIStructuredFactory",
        "supported_features": ["structured_outputs", "direct_extraction"],
        "processing_type": "direct_call"
    },
    
    # Baseline methods using direct-based schema
    "original_method": {
        "name": "Original Method",
        "description": "Original baseline extraction method",
        "method_type": MethodType.BASELINE,
        "schema_type": SchemaType.DIRECT_BASED,
        "module_path": "agentgraph.methods.baseline.original_method",
        "class_name": "OriginalKnowledgeExtractionMethod",
        "supported_features": ["direct_extraction"],
        "processing_type": "direct_call"
    },
    
    "clustering_method": {
        "name": "Clustering Method",
        "description": "Clustering-based extraction method",
        "method_type": MethodType.BASELINE,
        "schema_type": SchemaType.DIRECT_BASED,
        "module_path": "agentgraph.methods.baseline.clustering_method",
        "class_name": "ClusteringKnowledgeExtractionMethod",
        "supported_features": ["direct_extraction", "clustering"],
        "processing_type": "direct_call"
    },
    
    "direct_llm_method": {
        "name": "Direct LLM Method",
        "description": "Direct LLM-based extraction method",
        "method_type": MethodType.BASELINE,
        "schema_type": SchemaType.DIRECT_BASED,
        "module_path": "agentgraph.methods.baseline.direct_llm_method",
        "class_name": "DirectLLMKnowledgeExtractor",
        "supported_features": ["direct_extraction", "llm"],
        "processing_type": "direct_call"
    },
    
    "hybrid_method": {
        "name": "Hybrid Method",
        "description": "Hybrid extraction combining multiple approaches",
        "method_type": MethodType.BASELINE,
        "schema_type": SchemaType.DIRECT_BASED,
        "module_path": "agentgraph.methods.baseline.hybrid_method",
        "class_name": "HybridKnowledgeExtractionMethod",
        "supported_features": ["direct_extraction", "hybrid"],
        "processing_type": "direct_call"
    },
    
    "pydantic_method": {
        "name": "Pydantic Method",
        "description": "Pydantic-based extraction method",
        "method_type": MethodType.BASELINE,
        "schema_type": SchemaType.DIRECT_BASED,
        "module_path": "agentgraph.methods.baseline.pydantic_method",
        "class_name": "PydanticKnowledgeExtractor",
        "supported_features": ["direct_extraction", "pydantic"],
        "processing_type": "direct_call"
    },
    
    "unified_method": {
        "name": "Unified Method",
        "description": "Unified extraction method",
        "method_type": MethodType.BASELINE,
        "schema_type": SchemaType.DIRECT_BASED,
        "module_path": "agentgraph.methods.baseline.unified_method",
        "class_name": "UnifiedKnowledgeExtractionMethod",
        "supported_features": ["direct_extraction", "unified"],
        "processing_type": "direct_call"
    },
    
    "openai_agent": {
        "name": "OpenAI Agent",
        "description": "OpenAI Agent with function tools and validation",
        "method_type": MethodType.BASELINE,
        "schema_type": SchemaType.DIRECT_BASED,
        "module_path": "agentgraph.methods.baseline.openai_agent",
        "class_name": "OpenAIAgentKnowledgeExtractor",
        "supported_features": ["direct_extraction", "pipeline", "validation_improvement", "graph_enhancement"],
        "processing_type": "direct_call"
    },
    "sequential_pydantic": {
        "name": "Sequential Pydantic",
        "description": "Sequential Pydantic-based extraction method",
        "method_type": MethodType.BASELINE,
        "schema_type": SchemaType.DIRECT_BASED,
        "module_path": "agentgraph.methods.baseline.pydantic_method",
        "class_name": "PydanticKnowledgeExtractor",
        "supported_features": ["direct_extraction", "pydantic", "sequential"],
        "processing_type": "direct_call"
    },
    
    "pydantic_hybrid_method": {
        "name": "Pydantic Hybrid Method", 
        "description": "Hybrid Pydantic-based extraction method",
        "method_type": MethodType.BASELINE,
        "schema_type": SchemaType.DIRECT_BASED,
        "module_path": "agentgraph.methods.baseline.pydantic_method",
        "class_name": "PydanticKnowledgeExtractor",
        "supported_features": ["direct_extraction", "pydantic", "hybrid"],
        "processing_type": "direct_call"
    },
    
# rule_based_method removed due to import errors
}


# Default method configuration
DEFAULT_METHOD = "openai_structured"


def get_available_methods() -> Dict[str, Dict[str, Any]]:
    """Get all available methods with their metadata"""
    return AVAILABLE_METHODS.copy()


def get_method_info(method_name: str) -> Optional[Dict[str, Any]]:
    """Get information about a specific method"""
    return AVAILABLE_METHODS.get(method_name)


def get_methods_by_type(method_type: MethodType) -> Dict[str, Dict[str, Any]]:
    """Get methods filtered by type"""
    return {
        name: info for name, info in AVAILABLE_METHODS.items()
        if info["method_type"] == method_type
    }


def get_methods_by_schema(schema_type: SchemaType) -> Dict[str, Dict[str, Any]]:
    """Get methods filtered by schema type"""
    return {
        name: info for name, info in AVAILABLE_METHODS.items()
        if info["schema_type"] == schema_type
    }


def get_schema_for_method(method_name: str) -> Optional[SchemaType]:
    """Get the schema type for a specific method"""
    method_info = get_method_info(method_name)
    return method_info["schema_type"] if method_info else None


def is_valid_method(method_name: str) -> bool:
    """Check if a method name is valid"""
    return method_name in AVAILABLE_METHODS


def get_method_names() -> List[str]:
    """Get list of all method names"""
    return list(AVAILABLE_METHODS.keys())


def get_production_methods() -> List[str]:
    """Get list of production method names"""
    return [
        name for name, info in AVAILABLE_METHODS.items()
        if info["method_type"] == MethodType.PRODUCTION
    ]


def get_baseline_methods() -> List[str]:
    """Get list of baseline method names"""
    return [
        name for name, info in AVAILABLE_METHODS.items()
        if info["method_type"] == MethodType.BASELINE
    ]


def get_method_display_name(method_name: str) -> str:
    """Get display name for a method"""
    method_info = get_method_info(method_name)
    return method_info["name"] if method_info else method_name


def get_method_description(method_name: str) -> str:
    """Get description for a method"""
    method_info = get_method_info(method_name)
    return method_info["description"] if method_info else ""


def validate_method_schema_compatibility(method_name: str, expected_schema: SchemaType) -> bool:
    """Validate that a method uses the expected schema type"""
    method_schema = get_schema_for_method(method_name)
    return method_schema == expected_schema if method_schema else False