docling-rag / src /chunking_utils.py
unikill066's picture
Upload 23 files
2b717c9 verified
raw
history blame contribute delete
398 Bytes
import tiktoken
from docling_core.transforms.chunker.tokenizer.openai import OpenAITokenizer
from docling.chunking import HybridChunker
from config import MAX_TOKENS, MODEL_NAME
def create_chunker():
tokenizer = OpenAITokenizer(
tokenizer=tiktoken.encoding_for_model(MODEL_NAME),
max_tokens=MAX_TOKENS,
)
return HybridChunker(tokenizer=tokenizer, max_tokens=MAX_TOKENS)