Update document_processor.py
Browse files- document_processor.py +2 -2
document_processor.py
CHANGED
|
@@ -7,7 +7,7 @@ from utils import clean_text, setup_logger
|
|
| 7 |
|
| 8 |
logger = setup_logger('document_processor')
|
| 9 |
|
| 10 |
-
def split_into_chunks(text, chunk_size=
|
| 11 |
"""
|
| 12 |
Split text into overlapping chunks
|
| 13 |
|
|
@@ -55,7 +55,7 @@ def split_into_chunks(text, chunk_size=500, overlap=75):
|
|
| 55 |
|
| 56 |
return df
|
| 57 |
|
| 58 |
-
def load_single_document(file_path, chunk_size=
|
| 59 |
"""
|
| 60 |
Load a single document and split it into chunks
|
| 61 |
|
|
|
|
| 7 |
|
| 8 |
logger = setup_logger('document_processor')
|
| 9 |
|
| 10 |
+
def split_into_chunks(text, chunk_size=400, overlap=100):
|
| 11 |
"""
|
| 12 |
Split text into overlapping chunks
|
| 13 |
|
|
|
|
| 55 |
|
| 56 |
return df
|
| 57 |
|
| 58 |
+
def load_single_document(file_path, chunk_size=400, overlap=100):
|
| 59 |
"""
|
| 60 |
Load a single document and split it into chunks
|
| 61 |
|