MrSimple07 commited on
Commit
2676cd6
·
1 Parent(s): f1379ba

chunk size = 2048 + rows=15

Browse files
Files changed (1) hide show
  1. documents_prep.py +3 -3
documents_prep.py CHANGED
@@ -6,14 +6,14 @@ from llama_index.core import Document
6
  from llama_index.core.text_splitter import SentenceSplitter
7
  from my_logging import log_message
8
 
9
- # Configuration
10
  CHUNK_SIZE = 2048
11
  CHUNK_OVERLAP = 128
12
 
13
  def chunk_text_documents(documents):
14
  text_splitter = SentenceSplitter(
15
  chunk_size=CHUNK_SIZE,
16
- chunk_overlap=CHUNK_OVERLAP
 
17
  )
18
 
19
  chunked = []
@@ -23,7 +23,7 @@ def chunk_text_documents(documents):
23
  chunk.metadata.update({
24
  'chunk_id': i,
25
  'total_chunks': len(chunks),
26
- 'chunk_size': len(chunk.text) # Add chunk size
27
  })
28
  chunked.append(chunk)
29
 
 
6
  from llama_index.core.text_splitter import SentenceSplitter
7
  from my_logging import log_message
8
 
 
9
  CHUNK_SIZE = 2048
10
  CHUNK_OVERLAP = 128
11
 
12
  def chunk_text_documents(documents):
13
  text_splitter = SentenceSplitter(
14
  chunk_size=CHUNK_SIZE,
15
+ chunk_overlap=CHUNK_OVERLAP,
16
+ metadata_chunk_size=2048 # ADD THIS LINE - match or exceed chunk_size
17
  )
18
 
19
  chunked = []
 
23
  chunk.metadata.update({
24
  'chunk_id': i,
25
  'total_chunks': len(chunks),
26
+ 'chunk_size': len(chunk.text)
27
  })
28
  chunked.append(chunk)
29