minh-4T commited on
Commit
30cfeef
·
1 Parent(s): aa80210
Files changed (1) hide show
  1. rag/qa_pipeline.py +8 -2
rag/qa_pipeline.py CHANGED
@@ -185,6 +185,7 @@ def ask_ai_stream_delta(message: str, history: List, hybrid_retriever, cohort_ke
185
  for doc in final_docs:
186
  page = doc.metadata.get('page_number', 'N/A')
187
  file_name = doc.metadata.get('source_file') or doc.metadata.get('source')
 
188
  source = f"[{os.path.basename(file_name)} | Trang {page}]" if file_name else f"[Trang {page}]"
189
  block = f"{source}\n{doc.page_content}"
190
 
@@ -196,6 +197,7 @@ def ask_ai_stream_delta(message: str, history: List, hybrid_retriever, cohort_ke
196
  context_parts.append(block)
197
  context_docs.append({
198
  'source': file_name or "Không rõ",
 
199
  'page': page
200
  })
201
 
@@ -271,8 +273,12 @@ def ask_ai_stream_delta(message: str, history: List, hybrid_retriever, cohort_ke
271
  yield "\n\n---\n\n"
272
  yield "## 📚 Tài liệu tham khảo\n\n"
273
  seen_sources = set()
274
- for i, doc_info in enumerate(context_docs, 1):
275
  source_key = f"{doc_info['source']}_{doc_info['page']}"
276
  if source_key not in seen_sources:
277
  seen_sources.add(source_key)
278
- yield f"- **{doc_info['source']}** (Trang {doc_info['page']})\n"
 
 
 
 
 
185
  for doc in final_docs:
186
  page = doc.metadata.get('page_number', 'N/A')
187
  file_name = doc.metadata.get('source_file') or doc.metadata.get('source')
188
+ object_path = doc.metadata.get('source_relpath') or doc.metadata.get('object_path') or ''
189
  source = f"[{os.path.basename(file_name)} | Trang {page}]" if file_name else f"[Trang {page}]"
190
  block = f"{source}\n{doc.page_content}"
191
 
 
197
  context_parts.append(block)
198
  context_docs.append({
199
  'source': file_name or "Không rõ",
200
+ 'object_path': object_path,
201
  'page': page
202
  })
203
 
 
273
  yield "\n\n---\n\n"
274
  yield "## 📚 Tài liệu tham khảo\n\n"
275
  seen_sources = set()
276
+ for doc_info in context_docs:
277
  source_key = f"{doc_info['source']}_{doc_info['page']}"
278
  if source_key not in seen_sources:
279
  seen_sources.add(source_key)
280
+ object_path = str(doc_info.get('object_path') or '').strip()
281
+ if object_path:
282
+ yield f"- **{object_path}** (Trang {doc_info['page']})\n"
283
+ else:
284
+ yield f"- **{doc_info['source']}** (Trang {doc_info['page']})\n"