Spaces:
Sleeping
Sleeping
Commit ·
5f6b6af
1
Parent(s): 8c371f8
max chunk size= 4000 + max row = 5
Browse files- documents_prep.py +1 -1
documents_prep.py
CHANGED
|
@@ -38,7 +38,7 @@ def chunk_text_documents(documents):
|
|
| 38 |
return chunked
|
| 39 |
|
| 40 |
|
| 41 |
-
def chunk_table_by_content(table_data, doc_id, max_chars=
|
| 42 |
"""Chunk tables by content size AND row count"""
|
| 43 |
headers = table_data.get('headers', [])
|
| 44 |
rows = table_data.get('data', [])
|
|
|
|
| 38 |
return chunked
|
| 39 |
|
| 40 |
|
| 41 |
+
def chunk_table_by_content(table_data, doc_id, max_chars=2500, max_rows=10):
|
| 42 |
"""Chunk tables by content size AND row count"""
|
| 43 |
headers = table_data.get('headers', [])
|
| 44 |
rows = table_data.get('data', [])
|