from enum import Enum from typing import List from abc import ABC, abstractmethod from evoagentx.rag.schema import Document, Corpus class ChunkingStrategy(str, Enum): SIMPLE = "simple" SEMANTIC = "semantic" HIERARCHICAL = "hierarchical" class BaseChunker(ABC): """Abstract base class for chunking documents into smaller segments. This class defines the interface for chunking strategies in the RAG pipeline, converting Documents into a Corpus of Chunks. """ @abstractmethod def chunk(self, documents: List[Document], **kwargs) -> Corpus: """Chunk documents into a Corpus of Chunks. Args: documents (List[Document]): List of Document objects to chunk. **kwargs: Additional parameters specific to the chunking strategy. Returns: Corpus: A collection of Chunk objects. """ pass