update content_summary in chunking
Browse files- app/law_document_chunker.py +13 -3
app/law_document_chunker.py
CHANGED
|
@@ -155,8 +155,14 @@ class LawDocumentChunker:
|
|
| 155 |
metadata.sub_clause_letter = level_value
|
| 156 |
|
| 157 |
# Điền metadata từ parent chunks nếu có
|
|
|
|
| 158 |
if chunk_stack and parent_id:
|
| 159 |
self._fill_metadata_from_parents(metadata, chunk_stack, parent_id)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 160 |
|
| 161 |
return metadata
|
| 162 |
|
|
@@ -208,10 +214,14 @@ class LawDocumentChunker:
|
|
| 208 |
for i in range(parent_index - 1, -1, -1):
|
| 209 |
chunk_id, level, level_value, content = chunk_stack[i]
|
| 210 |
|
| 211 |
-
# Tìm tất cả chunks Điều
|
| 212 |
-
if level
|
|
|
|
|
|
|
|
|
|
|
|
|
| 213 |
ancestors.append((level, level_value, content))
|
| 214 |
-
logger.debug(f"[CHUNKER] Found ancestor: {
|
| 215 |
|
| 216 |
logger.debug(f"[CHUNKER] Found {len(ancestors)} ancestors: {[(level, value) for level, value, content in ancestors]}")
|
| 217 |
|
|
|
|
| 155 |
metadata.sub_clause_letter = level_value
|
| 156 |
|
| 157 |
# Điền metadata từ parent chunks nếu có
|
| 158 |
+
logger.debug(f"[CHUNKER] Creating chunk with level: {level}, parent_id: {parent_id}, stack_size: {len(chunk_stack)}")
|
| 159 |
if chunk_stack and parent_id:
|
| 160 |
self._fill_metadata_from_parents(metadata, chunk_stack, parent_id)
|
| 161 |
+
else:
|
| 162 |
+
logger.debug(f"[CHUNKER] Skipping metadata fill - no parent_id or empty stack")
|
| 163 |
+
|
| 164 |
+
# Debug final metadata
|
| 165 |
+
logger.debug(f"[CHUNKER] Final metadata for chunk {chunk_id[:8]}... - Level: {level}, Article: {metadata.article_number}, Clause: {metadata.clause_number}, Point: {metadata.sub_clause_letter}")
|
| 166 |
|
| 167 |
return metadata
|
| 168 |
|
|
|
|
| 214 |
for i in range(parent_index - 1, -1, -1):
|
| 215 |
chunk_id, level, level_value, content = chunk_stack[i]
|
| 216 |
|
| 217 |
+
# Tìm tất cả chunks Điều xuất hiện trước chunk hiện tại
|
| 218 |
+
if level == "DIEU":
|
| 219 |
+
ancestors.append((level, level_value, content))
|
| 220 |
+
logger.debug(f"[CHUNKER] Found DIEU ancestor: {level_value}")
|
| 221 |
+
# Tìm chunks Khoản chỉ nếu chunk hiện tại là Điểm
|
| 222 |
+
elif level == "KHOAN" and current_level == "DIEM":
|
| 223 |
ancestors.append((level, level_value, content))
|
| 224 |
+
logger.debug(f"[CHUNKER] Found KHOAN ancestor: {level_value}")
|
| 225 |
|
| 226 |
logger.debug(f"[CHUNKER] Found {len(ancestors)} ancestors: {[(level, value) for level, value, content in ancestors]}")
|
| 227 |
|