VietCat commited on
Commit
b5d3237
·
1 Parent(s): 811b7b0

fix metadata

Browse files
Files changed (1) hide show
  1. app/law_document_chunker.py +13 -0
app/law_document_chunker.py CHANGED
@@ -131,6 +131,15 @@ class LawDocumentChunker:
131
  logger.error(f"[CHUNKER] Error in _detect_structure_level for line '{line}': {e}")
132
  return "CONTENT", None, None
133
 
 
 
 
 
 
 
 
 
 
134
  def _create_chunk_metadata(self, content: str, level: str, level_value: Optional[str],
135
  parent_id: Optional[str], vanbanid: int,
136
  document_title: str, chunk_stack: List[Tuple[str, str, Optional[str], str]], chunk_dict: dict) -> 'ChunkMetadata':
@@ -157,6 +166,10 @@ class LawDocumentChunker:
157
  self._fill_metadata_from_parents(metadata, parent_id, chunk_dict)
158
  else:
159
  logger.debug(f"[CHUNKER] Skipping metadata fill - no parent_id or chunk_dict")
 
 
 
 
160
  logger.debug(f"[CHUNKER] Final metadata for chunk {chunk_id[:8]}... - Level: {level}, Article: {metadata.article_number}, Clause: {metadata.clause_number}, Point: {metadata.sub_clause_letter}")
161
  return metadata
162
 
 
131
  logger.error(f"[CHUNKER] Error in _detect_structure_level for line '{line}': {e}")
132
  return "CONTENT", None, None
133
 
134
+ def _build_structure_summary(self, article_number, clause_number, sub_clause_letter):
135
+ if sub_clause_letter and clause_number and article_number:
136
+ return f"Điểm {sub_clause_letter} Khoản {clause_number} Điều {article_number}"
137
+ elif clause_number and article_number:
138
+ return f"Khoản {clause_number} Điều {article_number}"
139
+ elif article_number:
140
+ return f"Điều {article_number}"
141
+ return ""
142
+
143
  def _create_chunk_metadata(self, content: str, level: str, level_value: Optional[str],
144
  parent_id: Optional[str], vanbanid: int,
145
  document_title: str, chunk_stack: List[Tuple[str, str, Optional[str], str]], chunk_dict: dict) -> 'ChunkMetadata':
 
166
  self._fill_metadata_from_parents(metadata, parent_id, chunk_dict)
167
  else:
168
  logger.debug(f"[CHUNKER] Skipping metadata fill - no parent_id or chunk_dict")
169
+ # Gán context_summary theo format pháp lý
170
+ metadata.context_summary = self._build_structure_summary(
171
+ metadata.article_number, metadata.clause_number, metadata.sub_clause_letter
172
+ )
173
  logger.debug(f"[CHUNKER] Final metadata for chunk {chunk_id[:8]}... - Level: {level}, Article: {metadata.article_number}, Clause: {metadata.clause_number}, Point: {metadata.sub_clause_letter}")
174
  return metadata
175