Spaces:
Running
Running
| """ | |
| Method Registry for Knowledge Extraction Methods | |
| This module provides a centralized registry for all available knowledge extraction methods | |
| and their associated schemas. Each method is bound to a specific schema type. | |
| """ | |
| from enum import Enum | |
| from typing import Any, Dict, List, Optional | |
| class MethodType(Enum): | |
| """Types of extraction methods""" | |
| PRODUCTION = "production" | |
| BASELINE = "baseline" | |
| class SchemaType(Enum): | |
| """Types of schemas used by methods""" | |
| REFERENCE_BASED = "reference_based" | |
| DIRECT_BASED = "direct_based" | |
| # Method Registry - maps method names to their implementations and schemas | |
| AVAILABLE_METHODS = { | |
| # Production method: OpenAI Structured Outputs (simple, direct_call) | |
| "openai_structured": { | |
| "name": "OpenAI Structured Outputs", | |
| "description": "Simple OpenAI structured outputs extractor using Pydantic models", | |
| "method_type": MethodType.PRODUCTION, | |
| "schema_type": SchemaType.REFERENCE_BASED, | |
| "module_path": "agentgraph.methods.production.openai_structured_extractor", | |
| "class_name": "OpenAIStructuredFactory", | |
| "supported_features": ["structured_outputs", "direct_extraction"], | |
| "processing_type": "direct_call" | |
| }, | |
| # Production method using reference-based schema | |
| "production": { | |
| "name": "Multi-Agent Knowledge Extractor", | |
| "description": "Production CrewAI-based multi-agent system with content reference resolution", | |
| "method_type": MethodType.PRODUCTION, | |
| "schema_type": SchemaType.REFERENCE_BASED, | |
| "module_path": "agentgraph.methods.production.multi_agent_knowledge_extractor", | |
| "class_name": "agent_monitoring_crew_factory", | |
| "supported_features": ["content_references", "failure_detection", "line_numbers"], | |
| "processing_type": "async_crew" | |
| }, | |
| "openai_structured": { | |
| "name": "OpenAI Structured Outputs", | |
| "description": "Simple OpenAI structured outputs extractor using Pydantic models", | |
| "method_type": MethodType.PRODUCTION, | |
| "schema_type": SchemaType.REFERENCE_BASED, | |
| "module_path": "agentgraph.methods.production.openai_structured_extractor", | |
| "class_name": "OpenAIStructuredFactory", | |
| "supported_features": ["structured_outputs", "direct_extraction"], | |
| "processing_type": "direct_call" | |
| }, | |
| # Baseline methods using direct-based schema | |
| "original_method": { | |
| "name": "Original Method", | |
| "description": "Original baseline extraction method", | |
| "method_type": MethodType.BASELINE, | |
| "schema_type": SchemaType.DIRECT_BASED, | |
| "module_path": "agentgraph.methods.baseline.original_method", | |
| "class_name": "OriginalKnowledgeExtractionMethod", | |
| "supported_features": ["direct_extraction"], | |
| "processing_type": "direct_call" | |
| }, | |
| "clustering_method": { | |
| "name": "Clustering Method", | |
| "description": "Clustering-based extraction method", | |
| "method_type": MethodType.BASELINE, | |
| "schema_type": SchemaType.DIRECT_BASED, | |
| "module_path": "agentgraph.methods.baseline.clustering_method", | |
| "class_name": "ClusteringKnowledgeExtractionMethod", | |
| "supported_features": ["direct_extraction", "clustering"], | |
| "processing_type": "direct_call" | |
| }, | |
| "direct_llm_method": { | |
| "name": "Direct LLM Method", | |
| "description": "Direct LLM-based extraction method", | |
| "method_type": MethodType.BASELINE, | |
| "schema_type": SchemaType.DIRECT_BASED, | |
| "module_path": "agentgraph.methods.baseline.direct_llm_method", | |
| "class_name": "DirectLLMKnowledgeExtractor", | |
| "supported_features": ["direct_extraction", "llm"], | |
| "processing_type": "direct_call" | |
| }, | |
| "hybrid_method": { | |
| "name": "Hybrid Method", | |
| "description": "Hybrid extraction combining multiple approaches", | |
| "method_type": MethodType.BASELINE, | |
| "schema_type": SchemaType.DIRECT_BASED, | |
| "module_path": "agentgraph.methods.baseline.hybrid_method", | |
| "class_name": "HybridKnowledgeExtractionMethod", | |
| "supported_features": ["direct_extraction", "hybrid"], | |
| "processing_type": "direct_call" | |
| }, | |
| "pydantic_method": { | |
| "name": "Pydantic Method", | |
| "description": "Pydantic-based extraction method", | |
| "method_type": MethodType.BASELINE, | |
| "schema_type": SchemaType.DIRECT_BASED, | |
| "module_path": "agentgraph.methods.baseline.pydantic_method", | |
| "class_name": "PydanticKnowledgeExtractor", | |
| "supported_features": ["direct_extraction", "pydantic"], | |
| "processing_type": "direct_call" | |
| }, | |
| "unified_method": { | |
| "name": "Unified Method", | |
| "description": "Unified extraction method", | |
| "method_type": MethodType.BASELINE, | |
| "schema_type": SchemaType.DIRECT_BASED, | |
| "module_path": "agentgraph.methods.baseline.unified_method", | |
| "class_name": "UnifiedKnowledgeExtractionMethod", | |
| "supported_features": ["direct_extraction", "unified"], | |
| "processing_type": "direct_call" | |
| }, | |
| "openai_agent": { | |
| "name": "OpenAI Agent", | |
| "description": "OpenAI Agent with function tools and validation", | |
| "method_type": MethodType.BASELINE, | |
| "schema_type": SchemaType.DIRECT_BASED, | |
| "module_path": "agentgraph.methods.baseline.openai_agent", | |
| "class_name": "OpenAIAgentKnowledgeExtractor", | |
| "supported_features": ["direct_extraction", "pipeline", "validation_improvement", "graph_enhancement"], | |
| "processing_type": "direct_call" | |
| }, | |
| "sequential_pydantic": { | |
| "name": "Sequential Pydantic", | |
| "description": "Sequential Pydantic-based extraction method", | |
| "method_type": MethodType.BASELINE, | |
| "schema_type": SchemaType.DIRECT_BASED, | |
| "module_path": "agentgraph.methods.baseline.pydantic_method", | |
| "class_name": "PydanticKnowledgeExtractor", | |
| "supported_features": ["direct_extraction", "pydantic", "sequential"], | |
| "processing_type": "direct_call" | |
| }, | |
| "pydantic_hybrid_method": { | |
| "name": "Pydantic Hybrid Method", | |
| "description": "Hybrid Pydantic-based extraction method", | |
| "method_type": MethodType.BASELINE, | |
| "schema_type": SchemaType.DIRECT_BASED, | |
| "module_path": "agentgraph.methods.baseline.pydantic_method", | |
| "class_name": "PydanticKnowledgeExtractor", | |
| "supported_features": ["direct_extraction", "pydantic", "hybrid"], | |
| "processing_type": "direct_call" | |
| }, | |
| # rule_based_method removed due to import errors | |
| } | |
| # Default method configuration | |
| DEFAULT_METHOD = "openai_structured" | |
| def get_available_methods() -> Dict[str, Dict[str, Any]]: | |
| """Get all available methods with their metadata""" | |
| return AVAILABLE_METHODS.copy() | |
| def get_method_info(method_name: str) -> Optional[Dict[str, Any]]: | |
| """Get information about a specific method""" | |
| return AVAILABLE_METHODS.get(method_name) | |
| def get_methods_by_type(method_type: MethodType) -> Dict[str, Dict[str, Any]]: | |
| """Get methods filtered by type""" | |
| return { | |
| name: info for name, info in AVAILABLE_METHODS.items() | |
| if info["method_type"] == method_type | |
| } | |
| def get_methods_by_schema(schema_type: SchemaType) -> Dict[str, Dict[str, Any]]: | |
| """Get methods filtered by schema type""" | |
| return { | |
| name: info for name, info in AVAILABLE_METHODS.items() | |
| if info["schema_type"] == schema_type | |
| } | |
| def get_schema_for_method(method_name: str) -> Optional[SchemaType]: | |
| """Get the schema type for a specific method""" | |
| method_info = get_method_info(method_name) | |
| return method_info["schema_type"] if method_info else None | |
| def is_valid_method(method_name: str) -> bool: | |
| """Check if a method name is valid""" | |
| return method_name in AVAILABLE_METHODS | |
| def get_method_names() -> List[str]: | |
| """Get list of all method names""" | |
| return list(AVAILABLE_METHODS.keys()) | |
| def get_production_methods() -> List[str]: | |
| """Get list of production method names""" | |
| return [ | |
| name for name, info in AVAILABLE_METHODS.items() | |
| if info["method_type"] == MethodType.PRODUCTION | |
| ] | |
| def get_baseline_methods() -> List[str]: | |
| """Get list of baseline method names""" | |
| return [ | |
| name for name, info in AVAILABLE_METHODS.items() | |
| if info["method_type"] == MethodType.BASELINE | |
| ] | |
| def get_method_display_name(method_name: str) -> str: | |
| """Get display name for a method""" | |
| method_info = get_method_info(method_name) | |
| return method_info["name"] if method_info else method_name | |
| def get_method_description(method_name: str) -> str: | |
| """Get description for a method""" | |
| method_info = get_method_info(method_name) | |
| return method_info["description"] if method_info else "" | |
| def validate_method_schema_compatibility(method_name: str, expected_schema: SchemaType) -> bool: | |
| """Validate that a method uses the expected schema type""" | |
| method_schema = get_schema_for_method(method_name) | |
| return method_schema == expected_schema if method_schema else False | |