test_AI_Agent

Sleeping

SarahXia0405 commited on Dec 5, 2025

Commit

e6c9deb

verified ·

1 Parent(s): 9f31b21

Update rag_engine.py

Files changed (1) hide show

rag_engine.py CHANGED Viewed

@@ -9,18 +9,15 @@ from clare_core import (
 )
 from langsmith import traceable
 from langsmith.run_helpers import set_run_metadata
 def build_rag_chunks_from_file(file, doc_type_val: str) -> List[Dict]:
     """
     从上传的文件构建 RAG chunk 列表（session 级别）。
-    - 支持 .docx 和 .pdf
     - 复用 syllabus_utils 里的解析函数，把文档切成一系列文本块
     - 对每个非空文本块做 embedding，存成 {"text": str, "embedding": List[float]}
-    当前 doc_type_val 主要用于未来扩展（不同类型文件可采用不同切块策略），
-    这里先不区分，统一按段落/块处理。
     """
     if file is None:
         return []
@@ -32,11 +29,13 @@ def build_rag_chunks_from_file(file, doc_type_val: str) -> List[Dict]:
     ext = os.path.splitext(file_path)[1].lower()
     try:
-        # 1) 解析文件 → 得到一组文本块
         if ext == ".docx":
             texts = parse_syllabus_docx(file_path)
         elif ext == ".pdf":
             texts = parse_syllabus_pdf(file_path)
         else:
             print(f"[RAG] unsupported file type for RAG: {ext}")
             return []

 )
 from langsmith import traceable
 from langsmith.run_helpers import set_run_metadata
+from syllabus_utils import parse_syllabus_docx, parse_syllabus_pdf, parse_pptx_slides
 def build_rag_chunks_from_file(file, doc_type_val: str) -> List[Dict]:
     """
     从上传的文件构建 RAG chunk 列表（session 级别）。
+    - 支持 .docx / .pdf / .pptx
     - 复用 syllabus_utils 里的解析函数，把文档切成一系列文本块
     - 对每个非空文本块做 embedding，存成 {"text": str, "embedding": List[float]}
     """
     if file is None:
         return []
     ext = os.path.splitext(file_path)[1].lower()
     try:
+        # 1) 解析文件 → 文本块列表
         if ext == ".docx":
             texts = parse_syllabus_docx(file_path)
         elif ext == ".pdf":
             texts = parse_syllabus_pdf(file_path)
+        elif ext == ".pptx":
+            texts = parse_pptx_slides(file_path)
         else:
             print(f"[RAG] unsupported file type for RAG: {ext}")
             return []