MrSimple07 commited on
Commit
54b1e69
·
1 Parent(s): c354c08

chunk size = 2048 + rows=15

Browse files
Files changed (1) hide show
  1. documents_prep.py +2 -2
documents_prep.py CHANGED
@@ -7,7 +7,7 @@ from llama_index.core.text_splitter import SentenceSplitter
7
  from my_logging import log_message
8
 
9
  # Configuration
10
- CHUNK_SIZE = 1500
11
  CHUNK_OVERLAP = 128
12
 
13
  def chunk_text_documents(documents):
@@ -38,7 +38,7 @@ def chunk_text_documents(documents):
38
  return chunked
39
 
40
 
41
- def chunk_table_by_rows(table_data, doc_id, rows_per_chunk=20, max_chars=2000):
42
  """
43
  Chunk tables by rows with fallback to character limit.
44
  Keeps 3-4 rows together, but splits individual rows if they're too large.
 
7
  from my_logging import log_message
8
 
9
  # Configuration
10
+ CHUNK_SIZE = 2048
11
  CHUNK_OVERLAP = 128
12
 
13
  def chunk_text_documents(documents):
 
38
  return chunked
39
 
40
 
41
+ def chunk_table_by_rows(table_data, doc_id, rows_per_chunk=15, max_chars=2000):
42
  """
43
  Chunk tables by rows with fallback to character limit.
44
  Keeps 3-4 rows together, but splits individual rows if they're too large.