Spaces:
Running
Running
| # api/courseware/rag.py | |
| """ | |
| Courseware RAG:统一检索并返回带引用的上下文。 | |
| - 本地 VDB(Weaviate / 上传文件 chunks):[Source: Filename/Page] | |
| - Web Search(可选):[Source: URL] | |
| """ | |
| from typing import List, Tuple, Optional | |
| from api.config import USE_WEAVIATE | |
| from api.weaviate_retrieve import retrieve_from_weaviate_with_refs, RefItem | |
| from api.courseware.references import format_references, append_references_to_content | |
| def get_rag_context_with_refs( | |
| query: str, | |
| top_k: int = 8, | |
| web_search_urls: Optional[List[str]] = None, | |
| max_context_chars: int = 6000, | |
| ) -> Tuple[str, List[RefItem]]: | |
| """ | |
| 获取 RAG 上下文与引用列表。优先使用 Weaviate;可选追加 web 来源。 | |
| context 已包含可放入 prompt 的参考摘录;refs 用于最终标注。 | |
| """ | |
| text = "" | |
| refs: List[RefItem] = [] | |
| if USE_WEAVIATE and query and len(query.strip()) >= 3: | |
| text, refs = retrieve_from_weaviate_with_refs(query, top_k=top_k) | |
| if text and max_context_chars > 0 and len(text) > max_context_chars: | |
| text = text[:max_context_chars] + "\n..." | |
| if web_search_urls: | |
| for url in web_search_urls[:10]: | |
| url = (url or "").strip() | |
| if url: | |
| refs.append({"type": "web", "url": url}) | |
| return (text or "").strip(), refs | |
| def inject_refs_instruction(refs: List[RefItem]) -> str: | |
| """生成给 LLM 的引用说明:要求回答中必须标注引用。""" | |
| if not refs: | |
| return "(本次无检索到参考资料,回答中可注明「无引用」。)" | |
| ref_block = format_references(refs) | |
| return ( | |
| "回答末尾必须附「References」小节,按以下格式逐条列出所依据来源:\n" | |
| "本地资料使用 [Source: 文件名/页码],网络来源使用 [Source: URL]。\n" | |
| "本次参考来源:\n" + ref_block | |
| ) | |