| from typing import Dict, Any | |
| from llama_index.core.embeddings import BaseEmbedding | |
| from .base import BaseChunker, ChunkingStrategy | |
| from .simple_chunker import SimpleChunker | |
| from .semantic_chunker import SemanticChunker | |
| from .hierachical_chunker import HierarchicalChunker | |
| from evoagentx.core.logging import logger | |
| __all__ = ['SimpleChunker', 'SemanticChunker', 'HierarchicalChunker', 'ChunkFactory', 'BaseChunker'] | |
| class ChunkFactory: | |
| """Factory for creating chunkers based on configuration.""" | |
| def create( | |
| self, | |
| strategy: ChunkingStrategy, | |
| embed_model: BaseEmbedding = None, | |
| chunker_config: Dict[str, Any] = None | |
| ) -> BaseChunker: | |
| """Create a chunker based on strategy and configuration. | |
| Args: | |
| strategy (ChunkingStrategy): The chunking strategy. | |
| embed_model (BaseEmbedding, optional): Embedding model for semantic chunking. | |
| chunker_config (Dict[str, Any], optional): Chunker configuration. | |
| Returns: | |
| BaseChunker: A chunker instance. | |
| Raises: | |
| ValueError: If the strategy or configuration is invalid. | |
| """ | |
| chunker_config = chunker_config or {} | |
| if strategy == ChunkingStrategy.SIMPLE: | |
| chunker = SimpleChunker( | |
| chunk_size=chunker_config.get("chunk_size", 1024), | |
| chunk_overlap=chunker_config.get("chunk_overlap", 20), | |
| max_workers=chunker_config.get("max_workers", 2) | |
| ) | |
| elif strategy == ChunkingStrategy.SEMANTIC: | |
| if not embed_model: | |
| raise ValueError("Embed model required for semantic chunking") | |
| chunker = SemanticChunker( | |
| embed_model=embed_model, | |
| similarity_threshold=chunker_config.get("similarity_threshold", 0.7), | |
| max_workers=chunker_config.get("max_workers", 2) | |
| ) | |
| elif strategy == ChunkingStrategy.HIERARCHICAL: | |
| chunker = HierarchicalChunker( | |
| chunk_sizes=chunker_config.get("chunk_sizes", [2048, 512, 128]), | |
| chunk_overlap=chunker_config.get("chunk_overlap", 20) | |
| ) | |
| else: | |
| raise ValueError(f"Unsupported chunking strategy: {strategy}") | |
| logger.info(f"Created chunker for strategy: {strategy}") | |
| return chunker |