Spaces:
Sleeping
Sleeping
Commit
·
54b1e69
1
Parent(s):
c354c08
chunk size = 2048 + rows=15
Browse files- documents_prep.py +2 -2
documents_prep.py
CHANGED
|
@@ -7,7 +7,7 @@ from llama_index.core.text_splitter import SentenceSplitter
|
|
| 7 |
from my_logging import log_message
|
| 8 |
|
| 9 |
# Configuration
|
| 10 |
-
CHUNK_SIZE =
|
| 11 |
CHUNK_OVERLAP = 128
|
| 12 |
|
| 13 |
def chunk_text_documents(documents):
|
|
@@ -38,7 +38,7 @@ def chunk_text_documents(documents):
|
|
| 38 |
return chunked
|
| 39 |
|
| 40 |
|
| 41 |
-
def chunk_table_by_rows(table_data, doc_id, rows_per_chunk=
|
| 42 |
"""
|
| 43 |
Chunk tables by rows with fallback to character limit.
|
| 44 |
Keeps 3-4 rows together, but splits individual rows if they're too large.
|
|
|
|
| 7 |
from my_logging import log_message
|
| 8 |
|
| 9 |
# Configuration
|
| 10 |
+
CHUNK_SIZE = 2048
|
| 11 |
CHUNK_OVERLAP = 128
|
| 12 |
|
| 13 |
def chunk_text_documents(documents):
|
|
|
|
| 38 |
return chunked
|
| 39 |
|
| 40 |
|
| 41 |
+
def chunk_table_by_rows(table_data, doc_id, rows_per_chunk=15, max_chars=2000):
|
| 42 |
"""
|
| 43 |
Chunk tables by rows with fallback to character limit.
|
| 44 |
Keeps 3-4 rows together, but splits individual rows if they're too large.
|