VietCat commited on
Commit
f793012
·
1 Parent(s): 72d54ca

update content_summary in chunking

Browse files
Files changed (1) hide show
  1. app/law_document_chunker.py +13 -3
app/law_document_chunker.py CHANGED
@@ -155,8 +155,14 @@ class LawDocumentChunker:
155
  metadata.sub_clause_letter = level_value
156
 
157
  # Điền metadata từ parent chunks nếu có
 
158
  if chunk_stack and parent_id:
159
  self._fill_metadata_from_parents(metadata, chunk_stack, parent_id)
 
 
 
 
 
160
 
161
  return metadata
162
 
@@ -208,10 +214,14 @@ class LawDocumentChunker:
208
  for i in range(parent_index - 1, -1, -1):
209
  chunk_id, level, level_value, content = chunk_stack[i]
210
 
211
- # Tìm tất cả chunks Điều/Khoản xuất hiện trước chunk hiện tại
212
- if level in ["DIEU", "KHOAN"]:
 
 
 
 
213
  ancestors.append((level, level_value, content))
214
- logger.debug(f"[CHUNKER] Found ancestor: {level} {level_value}")
215
 
216
  logger.debug(f"[CHUNKER] Found {len(ancestors)} ancestors: {[(level, value) for level, value, content in ancestors]}")
217
 
 
155
  metadata.sub_clause_letter = level_value
156
 
157
  # Điền metadata từ parent chunks nếu có
158
+ logger.debug(f"[CHUNKER] Creating chunk with level: {level}, parent_id: {parent_id}, stack_size: {len(chunk_stack)}")
159
  if chunk_stack and parent_id:
160
  self._fill_metadata_from_parents(metadata, chunk_stack, parent_id)
161
+ else:
162
+ logger.debug(f"[CHUNKER] Skipping metadata fill - no parent_id or empty stack")
163
+
164
+ # Debug final metadata
165
+ logger.debug(f"[CHUNKER] Final metadata for chunk {chunk_id[:8]}... - Level: {level}, Article: {metadata.article_number}, Clause: {metadata.clause_number}, Point: {metadata.sub_clause_letter}")
166
 
167
  return metadata
168
 
 
214
  for i in range(parent_index - 1, -1, -1):
215
  chunk_id, level, level_value, content = chunk_stack[i]
216
 
217
+ # Tìm tất cả chunks Điều xuất hiện trước chunk hiện tại
218
+ if level == "DIEU":
219
+ ancestors.append((level, level_value, content))
220
+ logger.debug(f"[CHUNKER] Found DIEU ancestor: {level_value}")
221
+ # Tìm chunks Khoản chỉ nếu chunk hiện tại là Điểm
222
+ elif level == "KHOAN" and current_level == "DIEM":
223
  ancestors.append((level, level_value, content))
224
+ logger.debug(f"[CHUNKER] Found KHOAN ancestor: {level_value}")
225
 
226
  logger.debug(f"[CHUNKER] Found {len(ancestors)} ancestors: {[(level, value) for level, value, content in ancestors]}")
227