File size: 2,405 Bytes
5374a2d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
from typing import Dict, Any
from llama_index.core.embeddings import BaseEmbedding
from .base import BaseChunker, ChunkingStrategy
from .simple_chunker import SimpleChunker
from .semantic_chunker import SemanticChunker
from .hierachical_chunker import HierarchicalChunker
from evoagentx.core.logging import logger
__all__ = ['SimpleChunker', 'SemanticChunker', 'HierarchicalChunker', 'ChunkFactory', 'BaseChunker']
class ChunkFactory:
"""Factory for creating chunkers based on configuration."""
def create(
self,
strategy: ChunkingStrategy,
embed_model: BaseEmbedding = None,
chunker_config: Dict[str, Any] = None
) -> BaseChunker:
"""Create a chunker based on strategy and configuration.
Args:
strategy (ChunkingStrategy): The chunking strategy.
embed_model (BaseEmbedding, optional): Embedding model for semantic chunking.
chunker_config (Dict[str, Any], optional): Chunker configuration.
Returns:
BaseChunker: A chunker instance.
Raises:
ValueError: If the strategy or configuration is invalid.
"""
chunker_config = chunker_config or {}
if strategy == ChunkingStrategy.SIMPLE:
chunker = SimpleChunker(
chunk_size=chunker_config.get("chunk_size", 1024),
chunk_overlap=chunker_config.get("chunk_overlap", 20),
max_workers=chunker_config.get("max_workers", 2)
)
elif strategy == ChunkingStrategy.SEMANTIC:
if not embed_model:
raise ValueError("Embed model required for semantic chunking")
chunker = SemanticChunker(
embed_model=embed_model,
similarity_threshold=chunker_config.get("similarity_threshold", 0.7),
max_workers=chunker_config.get("max_workers", 2)
)
elif strategy == ChunkingStrategy.HIERARCHICAL:
chunker = HierarchicalChunker(
chunk_sizes=chunker_config.get("chunk_sizes", [2048, 512, 128]),
chunk_overlap=chunker_config.get("chunk_overlap", 20)
)
else:
raise ValueError(f"Unsupported chunking strategy: {strategy}")
logger.info(f"Created chunker for strategy: {strategy}")
return chunker |