Spaces:
Sleeping
Sleeping
Commit
·
68ff9c7
1
Parent(s):
d490230
added new window for chunking results + added hybrid approach for chunking max limit is 2048"
Browse files- documents_prep.py +3 -3
documents_prep.py
CHANGED
|
@@ -226,12 +226,12 @@ def load_json_documents(repo_id, hf_token, json_files_dir, download_dir):
|
|
| 226 |
log_message(f"Всего создано {len(all_documents)} исходных документов")
|
| 227 |
log_message(f"После chunking получено {len(chunked_documents)} чанков")
|
| 228 |
|
| 229 |
-
|
|
|
|
| 230 |
|
| 231 |
except Exception as e:
|
| 232 |
log_message(f"Ошибка загрузки JSON документов: {str(e)}")
|
| 233 |
-
return []
|
| 234 |
-
|
| 235 |
|
| 236 |
def extract_section_title(section_text):
|
| 237 |
if not section_text.strip():
|
|
|
|
| 226 |
log_message(f"Всего создано {len(all_documents)} исходных документов")
|
| 227 |
log_message(f"После chunking получено {len(chunked_documents)} чанков")
|
| 228 |
|
| 229 |
+
# Return only chunked_documents, not the tuple
|
| 230 |
+
return chunked_documents
|
| 231 |
|
| 232 |
except Exception as e:
|
| 233 |
log_message(f"Ошибка загрузки JSON документов: {str(e)}")
|
| 234 |
+
return []
|
|
|
|
| 235 |
|
| 236 |
def extract_section_title(section_text):
|
| 237 |
if not section_text.strip():
|