Spaces:

ChienChung
/

SmartRAG_Multi-Agent_Assistant

Runtime error

App Files Files Community

ChienChung commited on Apr 6, 2025

Commit

2ff6e79

verified ·

1 Parent(s): 4fd8be5

Update app.py

Browse files

Files changed (1) hide show

app.py +101 -92

app.py CHANGED Viewed

@@ -1452,128 +1452,137 @@ def build_langgraph_pipeline():
 from tempfile import mkdtemp
 def get_file_path_tab6(file):
-    """改進的文件路徑處理函數，專門處理 Gradio 上傳的文件"""
-    try:
-        # 如果是 None
-        if file is None:
             return None
-        # 處理 Gradio 文件對象
-        if hasattr(file, 'name'):
-            return file.name
-        # 如果是字典（Gradio 文件上傳的另一種格式）
-        if isinstance(file, dict):
-            if 'name' in file:
-                return file['name']
             return None
-        # 如果是字符串路徑
-        if isinstance(file, str):
-            # 檢查常見的上傳路徑
-            possible_paths = [
-                file,
-                os.path.join('/tmp/gradio/', file),
-                os.path.join(os.getcwd(), file),
-                os.path.abspath(file)
-            ]
-            for path in possible_paths:
-                if os.path.exists(path):
-                    return path
-            # 如果找不到文件，返回原始路徑
-            return file
-        return None
-    except Exception as e:
-        print(f"Error in get_file_path_tab6: {e}")
         return None
 def langgraph_tab6_main(query: str, file=None):
     try:
-        # 初始化文件處理
         files = file if isinstance(file, list) else [file] if file else []
-        all_docs = []
-        file_names = []
-        docs_by_file = []
-        # 處理每個文件
         for f in files:
-            try:
-                # 獲取文件路徑
-                path = get_file_path_tab6(f)
-                if not path:
-                    print(f"Could not get valid path for file: {f}")
-                    continue
-                print(f"Attempting to process file: {path}")
-                # 根據文件類型選擇加載器
-                if path.lower().endswith(".pdf"):
-                    from langchain.document_loaders import PyPDFLoader
-                    loader = PyPDFLoader(path)
-                elif path.lower().endswith(".docx"):
-                    from langchain.document_loaders import UnstructuredWordDocumentLoader
-                    loader = UnstructuredWordDocumentLoader(path)
-                else:
-                    from langchain.document_loaders import TextLoader
-                    loader = TextLoader(path)
-                # 加載文件
-                docs = loader.load()
-                if docs:
-                    file_names.append(os.path.basename(path))
-                    if hasattr(docs[0], "page_content"):
-                        text = "\n".join([doc.page_content for doc in docs])
-                    else:
-                        text = "\n".join(docs)
-                    docs_by_file.append(text)
-                    all_docs.extend(docs)
-                    print(f"Successfully processed file: {path}")
-            except Exception as e:
-                print(f"Error processing file {f}: {e}")
                 continue
-        # 檢查是否有成功處理的文件
         if not all_docs:
-            return "No valid documents could be processed. Please check your file and try again."
-        # 其餘代碼保持不變...
-        chunks = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50).split_documents(all_docs)
-        db = FAISS.from_documents(chunks, embeddings)
-        retriever = db.as_retriever()
-        global session_retriever
-        session_retriever = retriever
-        global session_qa_chain
-        session_qa_chain = ConversationalRetrievalChain.from_llm(
-            llm=llm_gpt4,
-            retriever=retriever,
-            memory=ConversationBufferMemory(memory_key="chat_history", return_messages=True),
-        )
         parsed = parse_query(query)
         if (parsed.get("summarise") or parsed.get("compare")) and len(docs_by_file) > 0:
             final_answer = execute_multi_agent(parsed, docs_by_file, file_names)
             return final_answer
         graph = build_langgraph_pipeline()
         state = {"query": query, "file_names": file_names}
         if retriever is not None:
             state["retriever"] = retriever
         result = graph.invoke(state)
         if "answer" in result:
             return result["answer"]
         if "summary" in result:
             return result["summary"]
         return "No answer."
     except Exception as e:
-        print(f"Error in main function: {e}")
-        return f"[Tab6 Error] {str(e)}"
 # Gradio Interface Settings
 demo_description = """

 from tempfile import mkdtemp
+from tempfile import mkdtemp
+import os
 def get_file_path_tab6(file):
+    # DEBUG: 印出接收到的 file 物件
+    print("DEBUG: Received file object:", file)
+    # 如果傳入的是字串，確認該字串為存在的檔案路徑
+    if isinstance(file, str):
+        if os.path.exists(file):
+            return file
+        else:
+            print("DEBUG: String file path does not exist:", file)
             return None
+    # 如果傳入的是字典（Gradio 上傳後常見格式）
+    elif isinstance(file, dict):
+        data = file.get("data")
+        name = file.get("name")
+        print("DEBUG: File dict - name:", name, "data type:", type(data))
+        if data:
+            # 如果 data 為字串且該路徑存在，就直接返回
+            if isinstance(data, str) and os.path.exists(data):
+                return data
+            else:
+                # 將 data 寫入臨時檔案
+                temp_dir = mkdtemp()
+                file_path = os.path.join(temp_dir, name if name else "uploaded_file")
+                with open(file_path, "wb") as f:
+                    if isinstance(data, str):
+                        f.write(data.encode("utf-8"))
+                    else:
+                        f.write(data)
+                # 檢查檔案是否成功寫入
+                if os.path.exists(file_path):
+                    print("DEBUG: File successfully written to:", file_path)
+                    return file_path
+                else:
+                    print("DEBUG: File not created at:", file_path)
+                    return None
+        else:
+            # 如果沒有 data，就返回 None 避免返回無效檔案名稱
+            print("DEBUG: No data field in file dict")
             return None
+    # 如果是具有 .save 屬性的物件，直接呼叫 save 並返回檔案路徑
+    elif hasattr(file, "save"):
+        temp_dir = mkdtemp()
+        file_path = os.path.join(temp_dir, file.name)
+        file.save(file_path)
+        if os.path.exists(file_path):
+            print("DEBUG: File saved at:", file_path)
+            return file_path
+        else:
+            print("DEBUG: File not saved at:", file_path)
+            return None
+    else:
+        # 如果 file 有 .name 屬性，嘗試返回該屬性
+        if hasattr(file, "name"):
+            if os.path.exists(file.name):
+                return file.name
+        print("DEBUG: File type not recognized.")
         return None
 def langgraph_tab6_main(query: str, file=None):
     try:
+        # 取得上傳檔案列表（多檔案模式）
         files = file if isinstance(file, list) else [file] if file else []
+        all_docs = []        # 用於建立檢索器
+        file_names = []      # 檔案名稱列表
+        docs_by_file = []    # 每份文件的完整內文
         for f in files:
+            path = get_file_path_tab6(f)  # 使用新版 get_file_path_tab6
+            if not path:
+                print("DEBUG: get_file_path_tab6 returned None for file:", f)
+                continue
+            if not os.path.exists(path):
+                print("DEBUG: Returned path does not exist:", path)
                 continue
+            file_names.append(os.path.basename(path))
+            print("DEBUG: Processing file:", path)
+            if path.lower().endswith(".pdf"):
+                loader = PyPDFLoader(path)
+            elif path.lower().endswith(".docx"):
+                loader = UnstructuredWordDocumentLoader(path)
+            else:
+                loader = TextLoader(path)
+            docs = loader.load()
+            print("DEBUG: Docs loaded from", path, ":", docs)
+            if docs and hasattr(docs[0], "page_content"):
+                text = "\n".join([doc.page_content for doc in docs])
+            else:
+                text = "\n".join(docs)
+            docs_by_file.append(text)
+            all_docs.extend(docs)
+        # 建立檢索器（用於非多代理流程）
         if not all_docs:
+            print("DEBUG: No valid document content read. file_names:", file_names)
+            retriever = None
+        else:
+            chunks = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50).split_documents(all_docs)
+            db = FAISS.from_documents(chunks, embeddings)
+            retriever = db.as_retriever()
+            global session_retriever
+            session_retriever = retriever
+            global session_qa_chain
+            session_qa_chain = ConversationalRetrievalChain.from_llm(
+                llm=llm_gpt4,
+                retriever=retriever,
+                memory=ConversationBufferMemory(memory_key="chat_history", return_messages=True),
+            )
+        # 解析查詢拆解子意圖
         parsed = parse_query(query)
         if (parsed.get("summarise") or parsed.get("compare")) and len(docs_by_file) > 0:
             final_answer = execute_multi_agent(parsed, docs_by_file, file_names)
             return final_answer
+        # 否則，走原有 LangGraph pipeline
         graph = build_langgraph_pipeline()
         state = {"query": query, "file_names": file_names}
         if retriever is not None:
             state["retriever"] = retriever
         result = graph.invoke(state)
         if "answer" in result:
             return result["answer"]
         if "summary" in result:
             return result["summary"]
         return "No answer."
     except Exception as e:
+        return f"[Tab6 Error] {e}"
 # Gradio Interface Settings
 demo_description = """