claudqunwang's picture
feat(courseware): AI Teacher Assistant Agent 模块化
0cde401
# api/courseware/rag.py
"""
Courseware RAG:统一检索并返回带引用的上下文。
- 本地 VDB(Weaviate / 上传文件 chunks):[Source: Filename/Page]
- Web Search(可选):[Source: URL]
"""
from typing import List, Tuple, Optional
from api.config import USE_WEAVIATE
from api.weaviate_retrieve import retrieve_from_weaviate_with_refs, RefItem
from api.courseware.references import format_references, append_references_to_content
def get_rag_context_with_refs(
query: str,
top_k: int = 8,
web_search_urls: Optional[List[str]] = None,
max_context_chars: int = 6000,
) -> Tuple[str, List[RefItem]]:
"""
获取 RAG 上下文与引用列表。优先使用 Weaviate;可选追加 web 来源。
context 已包含可放入 prompt 的参考摘录;refs 用于最终标注。
"""
text = ""
refs: List[RefItem] = []
if USE_WEAVIATE and query and len(query.strip()) >= 3:
text, refs = retrieve_from_weaviate_with_refs(query, top_k=top_k)
if text and max_context_chars > 0 and len(text) > max_context_chars:
text = text[:max_context_chars] + "\n..."
if web_search_urls:
for url in web_search_urls[:10]:
url = (url or "").strip()
if url:
refs.append({"type": "web", "url": url})
return (text or "").strip(), refs
def inject_refs_instruction(refs: List[RefItem]) -> str:
"""生成给 LLM 的引用说明:要求回答中必须标注引用。"""
if not refs:
return "(本次无检索到参考资料,回答中可注明「无引用」。)"
ref_block = format_references(refs)
return (
"回答末尾必须附「References」小节,按以下格式逐条列出所依据来源:\n"
"本地资料使用 [Source: 文件名/页码],网络来源使用 [Source: URL]。\n"
"本次参考来源:\n" + ref_block
)