Spaces:
Sleeping
Sleeping
Commit ·
15a7dee
1
Parent(s): 4a17e89
max chunk size= 4000 + max row = 5
Browse files- documents_prep.py +1 -1
- table_prep.py +0 -4
documents_prep.py
CHANGED
|
@@ -7,7 +7,7 @@ from llama_index.core.text_splitter import SentenceSplitter
|
|
| 7 |
from my_logging import log_message
|
| 8 |
|
| 9 |
# Configuration
|
| 10 |
-
CHUNK_SIZE =
|
| 11 |
CHUNK_OVERLAP = 128
|
| 12 |
|
| 13 |
def chunk_text_documents(documents):
|
|
|
|
| 7 |
from my_logging import log_message
|
| 8 |
|
| 9 |
# Configuration
|
| 10 |
+
CHUNK_SIZE = 1024
|
| 11 |
CHUNK_OVERLAP = 128
|
| 12 |
|
| 13 |
def chunk_text_documents(documents):
|
table_prep.py
CHANGED
|
@@ -32,10 +32,6 @@ def create_table_content(table_data):
|
|
| 32 |
|
| 33 |
return content
|
| 34 |
|
| 35 |
-
from llama_index.core.text_splitter import SentenceSplitter
|
| 36 |
-
from config import CHUNK_SIZE, CHUNK_OVERLAP
|
| 37 |
-
|
| 38 |
-
|
| 39 |
def chunk_table_document(doc, max_chunk_size=MAX_CHUNK_SIZE, max_rows_per_chunk=MAX_ROWS_PER_CHUNK):
|
| 40 |
lines = doc.text.strip().split('\n')
|
| 41 |
|
|
|
|
| 32 |
|
| 33 |
return content
|
| 34 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
def chunk_table_document(doc, max_chunk_size=MAX_CHUNK_SIZE, max_rows_per_chunk=MAX_ROWS_PER_CHUNK):
|
| 36 |
lines = doc.text.strip().split('\n')
|
| 37 |
|