Spaces:

ChienChung
/

SmartRAG_Multi-Agent_Assistant

Runtime error

App Files Files Community

ChienChung commited on Apr 6, 2025

Commit

4fd8be5

verified ·

1 Parent(s): 5aaf550

Update app.py

Browse files

Files changed (1) hide show

app.py +60 -102

app.py CHANGED Viewed

@@ -1452,122 +1452,76 @@ def build_langgraph_pipeline():
 from tempfile import mkdtemp
-from pathlib import Path
-import logging
-logger = logging.getLogger(__name__)
 def get_file_path_tab6(file):
-    """改進的文件路徑處理函數"""
     try:
-        logger.debug(f"Processing file input: {file}")
-        # 如果輸入為 None
         if file is None:
-            logger.warning("File input is None")
             return None
-        # 處理字串輸入
         if isinstance(file, str):
-            # 移除路徑中的特殊字符
-            safe_path = file.replace('\n', '').replace('\r', '').strip()
-            # 檢查多個可能的路徑
-            potential_paths = [
-                safe_path,
-                os.path.join("/tmp/gradio/", safe_path),
-                os.path.join(os.getcwd(), safe_path)
             ]
-            for path in potential_paths:
                 if os.path.exists(path):
-                    logger.info(f"Found file at: {path}")
                     return path
-            logger.warning(f"Could not find file in any of these locations: {potential_paths}")
-            return None
-        # 處理字典輸入（Gradio 上傳文件的常見格式）
-        elif isinstance(file, dict):
-            logger.debug("Processing dictionary input")
-            # 獲取文件數據和名稱
-            data = file.get("data")
-            name = file.get("name", "uploaded_file")
-            if data:
-                # 創建臨時目錄
-                temp_dir = mkdtemp()
-                file_path = os.path.join(temp_dir, name)
-                # 寫入文件
-                try:
-                    with open(file_path, "wb") as f:
-                        if isinstance(data, str):
-                            f.write(data.encode("utf-8"))
-                        else:
-                            f.write(data)
-                    if os.path.exists(file_path):
-                        logger.info(f"Successfully created file at: {file_path}")
-                        return file_path
-                except Exception as e:
-                    logger.error(f"Error writing file: {e}")
-                    return None
-            logger.warning("No data found in file dictionary")
-            return None
-        # 處理具有 save 方法的對象（如 UploadedFile）
-        elif hasattr(file, "save"):
-            logger.debug("Processing file object with save method")
-            try:
-                temp_dir = mkdtemp()
-                file_name = getattr(file, "name", "uploaded_file")
-                file_path = os.path.join(temp_dir, file_name)
-                file.save(file_path)
-                if os.path.exists(file_path):
-                    logger.info(f"Successfully saved file to: {file_path}")
-                    return file_path
-            except Exception as e:
-                logger.error(f"Error saving file: {e}")
-                return None
-        # 處理其他情況
-        else:
-            logger.warning(f"Unsupported file type: {type(file)}")
-            return None
     except Exception as e:
-        logger.error(f"Error in get_file_path_tab6: {e}")
         return None
 def langgraph_tab6_main(query: str, file=None):
     try:
         files = file if isinstance(file, list) else [file] if file else []
         all_docs = []
         file_names = []
         docs_by_file = []
         for f in files:
-            path = get_file_path_tab6(f)
-            if not path:
-                logger.warning(f"Could not process file: {f}")
-                continue
-            logger.info(f"Processing file: {path}")
             try:
                 if path.lower().endswith(".pdf"):
                     loader = PyPDFLoader(path)
                 elif path.lower().endswith(".docx"):
                     loader = UnstructuredWordDocumentLoader(path)
                 else:
                     loader = TextLoader(path)
                 docs = loader.load()
                 if docs:
                     file_names.append(os.path.basename(path))
@@ -1577,26 +1531,28 @@ def langgraph_tab6_main(query: str, file=None):
                         text = "\n".join(docs)
                     docs_by_file.append(text)
                     all_docs.extend(docs)
             except Exception as e:
-                logger.error(f"Error loading file {path}: {e}")
                 continue
-        # 如果沒有成功處理任何文件
         if not all_docs:
             return "No valid documents could be processed. Please check your file and try again."
-        else:
-            chunks = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50).split_documents(all_docs)
-            db = FAISS.from_documents(chunks, embeddings)
-            retriever = db.as_retriever()
-            global session_retriever
-            session_retriever = retriever
-            global session_qa_chain
-            session_qa_chain = ConversationalRetrievalChain.from_llm(
-                llm=llm_gpt4,
-                retriever=retriever,
-                memory=ConversationBufferMemory(memory_key="chat_history", return_messages=True),
-            )
         parsed = parse_query(query)
         if (parsed.get("summarise") or parsed.get("compare")) and len(docs_by_file) > 0:
@@ -1607,14 +1563,16 @@ def langgraph_tab6_main(query: str, file=None):
         state = {"query": query, "file_names": file_names}
         if retriever is not None:
             state["retriever"] = retriever
         result = graph.invoke(state)
         if "answer" in result:
             return result["answer"]
         if "summary" in result:
             return result["summary"]
         return "No answer."
     except Exception as e:
-        logger.error(f"Error in main function: {e}")
         return f"[Tab6 Error] {str(e)}"
 # Gradio Interface Settings

 from tempfile import mkdtemp
 def get_file_path_tab6(file):
+    """改進的文件路徑處理函數，專門處理 Gradio 上傳的文件"""
     try:
+        # 如果是 None
         if file is None:
             return None
+        # 處理 Gradio 文件對象
+        if hasattr(file, 'name'):
+            return file.name
+        # 如果是字典（Gradio 文件上傳的另一種格式）
+        if isinstance(file, dict):
+            if 'name' in file:
+                return file['name']
+            return None
+        # 如果是字符串路徑
         if isinstance(file, str):
+            # 檢查常見的上傳路徑
+            possible_paths = [
+                file,
+                os.path.join('/tmp/gradio/', file),
+                os.path.join(os.getcwd(), file),
+                os.path.abspath(file)
             ]
+            for path in possible_paths:
                 if os.path.exists(path):
                     return path
+            # 如果找不到文件，返回原始路徑
+            return file
+        return None
     except Exception as e:
+        print(f"Error in get_file_path_tab6: {e}")
         return None
 def langgraph_tab6_main(query: str, file=None):
     try:
+        # 初始化文件處理
         files = file if isinstance(file, list) else [file] if file else []
         all_docs = []
         file_names = []
         docs_by_file = []
+        # 處理每個文件
         for f in files:
             try:
+                # 獲取文件路徑
+                path = get_file_path_tab6(f)
+                if not path:
+                    print(f"Could not get valid path for file: {f}")
+                    continue
+                print(f"Attempting to process file: {path}")
+                # 根據文件類型選擇加載器
                 if path.lower().endswith(".pdf"):
+                    from langchain.document_loaders import PyPDFLoader
                     loader = PyPDFLoader(path)
                 elif path.lower().endswith(".docx"):
+                    from langchain.document_loaders import UnstructuredWordDocumentLoader
                     loader = UnstructuredWordDocumentLoader(path)
                 else:
+                    from langchain.document_loaders import TextLoader
                     loader = TextLoader(path)
+                # 加載文件
                 docs = loader.load()
                 if docs:
                     file_names.append(os.path.basename(path))
                         text = "\n".join(docs)
                     docs_by_file.append(text)
                     all_docs.extend(docs)
+                    print(f"Successfully processed file: {path}")
             except Exception as e:
+                print(f"Error processing file {f}: {e}")
                 continue
+        # 檢查是否有成功處理的文件
         if not all_docs:
             return "No valid documents could be processed. Please check your file and try again."
+        # 其餘代碼保持不變...
+        chunks = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50).split_documents(all_docs)
+        db = FAISS.from_documents(chunks, embeddings)
+        retriever = db.as_retriever()
+        global session_retriever
+        session_retriever = retriever
+        global session_qa_chain
+        session_qa_chain = ConversationalRetrievalChain.from_llm(
+            llm=llm_gpt4,
+            retriever=retriever,
+            memory=ConversationBufferMemory(memory_key="chat_history", return_messages=True),
+        )
         parsed = parse_query(query)
         if (parsed.get("summarise") or parsed.get("compare")) and len(docs_by_file) > 0:
         state = {"query": query, "file_names": file_names}
         if retriever is not None:
             state["retriever"] = retriever
         result = graph.invoke(state)
         if "answer" in result:
             return result["answer"]
         if "summary" in result:
             return result["summary"]
         return "No answer."
     except Exception as e:
+        print(f"Error in main function: {e}")
         return f"[Tab6 Error] {str(e)}"
 # Gradio Interface Settings