File size: 894 Bytes
5374a2d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 |
from enum import Enum
from typing import List
from abc import ABC, abstractmethod
from evoagentx.rag.schema import Document, Corpus
class ChunkingStrategy(str, Enum):
SIMPLE = "simple"
SEMANTIC = "semantic"
HIERARCHICAL = "hierarchical"
class BaseChunker(ABC):
"""Abstract base class for chunking documents into smaller segments.
This class defines the interface for chunking strategies in the RAG pipeline,
converting Documents into a Corpus of Chunks.
"""
@abstractmethod
def chunk(self, documents: List[Document], **kwargs) -> Corpus:
"""Chunk documents into a Corpus of Chunks.
Args:
documents (List[Document]): List of Document objects to chunk.
**kwargs: Additional parameters specific to the chunking strategy.
Returns:
Corpus: A collection of Chunk objects.
"""
pass |