Spaces:
Sleeping
Sleeping
update
Browse files- rag/qa_pipeline.py +8 -2
rag/qa_pipeline.py
CHANGED
|
@@ -185,6 +185,7 @@ def ask_ai_stream_delta(message: str, history: List, hybrid_retriever, cohort_ke
|
|
| 185 |
for doc in final_docs:
|
| 186 |
page = doc.metadata.get('page_number', 'N/A')
|
| 187 |
file_name = doc.metadata.get('source_file') or doc.metadata.get('source')
|
|
|
|
| 188 |
source = f"[{os.path.basename(file_name)} | Trang {page}]" if file_name else f"[Trang {page}]"
|
| 189 |
block = f"{source}\n{doc.page_content}"
|
| 190 |
|
|
@@ -196,6 +197,7 @@ def ask_ai_stream_delta(message: str, history: List, hybrid_retriever, cohort_ke
|
|
| 196 |
context_parts.append(block)
|
| 197 |
context_docs.append({
|
| 198 |
'source': file_name or "Không rõ",
|
|
|
|
| 199 |
'page': page
|
| 200 |
})
|
| 201 |
|
|
@@ -271,8 +273,12 @@ def ask_ai_stream_delta(message: str, history: List, hybrid_retriever, cohort_ke
|
|
| 271 |
yield "\n\n---\n\n"
|
| 272 |
yield "## 📚 Tài liệu tham khảo\n\n"
|
| 273 |
seen_sources = set()
|
| 274 |
-
for
|
| 275 |
source_key = f"{doc_info['source']}_{doc_info['page']}"
|
| 276 |
if source_key not in seen_sources:
|
| 277 |
seen_sources.add(source_key)
|
| 278 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 185 |
for doc in final_docs:
|
| 186 |
page = doc.metadata.get('page_number', 'N/A')
|
| 187 |
file_name = doc.metadata.get('source_file') or doc.metadata.get('source')
|
| 188 |
+
object_path = doc.metadata.get('source_relpath') or doc.metadata.get('object_path') or ''
|
| 189 |
source = f"[{os.path.basename(file_name)} | Trang {page}]" if file_name else f"[Trang {page}]"
|
| 190 |
block = f"{source}\n{doc.page_content}"
|
| 191 |
|
|
|
|
| 197 |
context_parts.append(block)
|
| 198 |
context_docs.append({
|
| 199 |
'source': file_name or "Không rõ",
|
| 200 |
+
'object_path': object_path,
|
| 201 |
'page': page
|
| 202 |
})
|
| 203 |
|
|
|
|
| 273 |
yield "\n\n---\n\n"
|
| 274 |
yield "## 📚 Tài liệu tham khảo\n\n"
|
| 275 |
seen_sources = set()
|
| 276 |
+
for doc_info in context_docs:
|
| 277 |
source_key = f"{doc_info['source']}_{doc_info['page']}"
|
| 278 |
if source_key not in seen_sources:
|
| 279 |
seen_sources.add(source_key)
|
| 280 |
+
object_path = str(doc_info.get('object_path') or '').strip()
|
| 281 |
+
if object_path:
|
| 282 |
+
yield f"- **{object_path}** (Trang {doc_info['page']})\n"
|
| 283 |
+
else:
|
| 284 |
+
yield f"- **{doc_info['source']}** (Trang {doc_info['page']})\n"
|