Spaces:

ChienChung
/

SmartRAG_Multi-Agent_Assistant

Runtime error

App Files Files Community

ChienChung commited on Apr 7, 2025

Commit

6c70f02

verified ·

1 Parent(s): a4b0a13

Update app.py

Browse files

Files changed (1) hide show

app.py +172 -138

app.py CHANGED Viewed

@@ -1282,6 +1282,120 @@ def parse_query(query: str) -> dict:
             "find_relations": False
         }
 def compare_documents(doc1: str, doc2: str, file1_name: str, file2_name: str) -> str:
     try:
         prompt = f"""Compare the following two documents and identify their main differences:
@@ -1323,73 +1437,10 @@ def find_document_relations(docs: list, file_names: list) -> str:
     response = llm_gpt4.invoke(prompt)
     return response.content
-def execute_multi_agent(parsed: dict, docs: list, file_names: list) -> str:
-    try:
-        results = {}
-        # 處理直接的比較請求
-        if len(docs) == 2:
-            comparison = compare_documents(
-                docs[0], docs[1],
-                file_names[0], file_names[1]
-            )
-            results["comparisons"] = comparison
-        # 處理其他分析
-        if parsed.get("summarize_files"):
-            summaries = []
-            for idx in parsed["summarize_files"]:
-                if idx < len(docs):
-                    summary = document_summarize(docs[idx])
-                    summaries.append(f"Document {file_names[idx]} summary:\n{summary}")
-            results["summaries"] = "\n\n".join(summaries)
-        if parsed.get("find_relations"):
-            results["relations"] = find_document_relations(docs, file_names)
-        # 融合結果
-        fusion_prompt = f"""Based on the following analysis results, provide a comprehensive answer:
-        Comparison Results:
-        {results.get('comparisons', 'No comparison performed')}
-        Summary Results:
-        {results.get('summaries', 'No summaries generated')}
-        Relationship Analysis:
-        {results.get('relations', 'No relationship analysis performed')}
-        Please provide a coherent response that covers all important findings.
-        """
-        final_response = llm_gpt4.invoke(fusion_prompt)
-        return final_response.content if hasattr(final_response, 'content') else str(final_response)
-    except Exception as e:
-        print(f"ERROR in execute_multi_agent: {str(e)}")
-        return f"Error occurred while processing the documents: {str(e)}"
 # === AutoGen 多代理人協作邏輯 ===
-def autogen_multi_intent_agent(query: str, docs: list) -> str:
-    try:
-        context = "\n\n".join(d.page_content for d in docs[:10])
-        system_prompt = f"""You are a helpful assistant. Your task is to answer the following user question using two strategies:
-1. Use context-based question answering based on the document below.
-2. Also generate a short summary of the document, in case that helps interpret the question.
-Document Context:
-{context}
-"""
-        user_proxy = UserProxyAgent(name="User", is_termination_msg=lambda x: True, human_input_mode="NEVER")
-        qa_agent = AssistantAgent(name="QA_Agent", system_message="You are great at document-based QA.")
-        sum_agent = AssistantAgent(name="Summary_Agent", system_message="You are great at summarising text.")
-        group_chat = GroupChat(agents=[user_proxy, qa_agent, sum_agent], messages=[], max_round=3)
-        manager = GroupChatManager(groupchat=group_chat, llm_config={"config_list": [{"model": "gpt-4o", "api_key": openai_api_key}]})
-        user_proxy.initiate_chat(manager, message=query)
-        return user_proxy.last_message()["content"]
-    except Exception as e:
-        return f"[AutoGen Error] {e}"
 def detect_intent_embedding(query, file_names=[]):
     query_emb = embedding_model.encode(query, normalize_embeddings=True)
@@ -1443,18 +1494,6 @@ search_task = Task(
 )
 # === LangGraph 節點函數 ===
-def docqa_run(state):
-    result = document_qa_agent.execute_task(docqa_task, {"query": state["query"]})
-    if isinstance(result, str):
-        output = result.lower()
-    else:
-        output = result.output.lower()
-    if any(x in output for x in ["no relevant info", "not found", "no answer"]):
-        return general_run(state)
-    if isinstance(result, str):
-        return {"answer": result}
-    else:
-        return {"answer": result.output}
 def general_run(state):
     """改用直接 LLM 回答取代 General Agent"""
@@ -1471,12 +1510,49 @@ def general_run(state):
         print(f"ERROR in general_run: {str(e)}")
         return {"answer": "I apologize, but I'm having trouble processing your request."}
 def summariser_run(state):
-    result = summarizer_agent.execute_task(summariser_task, {"query": state["query"]})
-    if isinstance(result, str):
-        return {"summary": result}
-    else:
-        return {"summary": result.output}
 # === LangGraph 定義 ===
 def build_langgraph_pipeline():
@@ -1566,6 +1642,7 @@ def langgraph_tab6_main(query: str, file=None):
         file_names = []
         docs_by_file = []
         for f in files:
             try:
                 path = get_file_path_tab6(f)
@@ -1618,14 +1695,11 @@ def langgraph_tab6_main(query: str, file=None):
             print(f"ERROR setting up retriever: {str(e)}")
             retriever = None
-        # 解析查詢意圖
-        parsed = parse_query(query)
-        # 如果是複雜查詢（比較、關聯分析等），使用 execute_multi_agent
-        if needs_multi_agent_processing(query, parsed, docs_by_file):
-            return execute_multi_agent(parsed, docs_by_file, file_names)
-        # 使用 LangGraph 處理一般查詢
         state = {
             "query": query,
             "file_names": file_names,
@@ -1633,22 +1707,14 @@ def langgraph_tab6_main(query: str, file=None):
             "retriever": retriever
         }
-        graph = build_langgraph_pipeline()
-        result = graph.invoke(state)
-        # 處理結果
-        if isinstance(result, dict):
-            if "answer" in result:
-                return result["answer"]
-            elif "summary" in result:
-                return result["summary"]
-            elif session_qa_chain:
-                try:
-                    return session_qa_chain.run(query)
-                except Exception as e:
-                    print(f"ERROR in QA chain: {str(e)}")
-        return "I apologize, but I couldn't process your query properly."
     except Exception as e:
         print(f"ERROR in main function: {str(e)}")
@@ -1656,39 +1722,7 @@ def langgraph_tab6_main(query: str, file=None):
-def needs_multi_agent_processing(query: str, parsed: dict, docs: list) -> bool:
-    """判斷是否需要多代理處理"""
-    # 檢查是否為比較或關聯分析查詢
-    comparison_keywords = ["compare", "difference", "differences", "between", "similar", "similarity"]
-    is_comparison_query = any(keyword in query.lower() for keyword in comparison_keywords)
-    # 如果是比較查詢且有多個文件，直接返回 True
-    if is_comparison_query and len(docs) > 1:
-        return True
-    return any([
-        parsed.get("summarize_files"),
-        parsed.get("compare_files"),
-        parsed.get("find_relations"),
-        len(docs) > 1 and any(x in query.lower() for x in [
-            "both", "relation", "project", "connection"
-        ])
-    ])
-def process_result(result: dict, query: str) -> str:
-    """處理查詢結果"""
-    if isinstance(result, dict):
-        if "answer" in result:
-            return result["answer"]
-        elif "summary" in result:
-            return result["summary"]
-        elif session_qa_chain:
-            try:
-                return session_qa_chain.run(query)
-            except Exception as e:
-                print(f"ERROR in QA chain: {str(e)}")
-    return "I apologize, but I couldn't find a relevant answer in the documents."
 # Gradio Interface Settings
 demo_description = """

             "find_relations": False
         }
+def analyze_documents(query: str, docs: list, file_names: list) -> str:
+    """通用文件分析函數，能處理各種類型的查詢"""
+    try:
+        # 準備文件上下文
+        context = "\n\n".join(
+            f"Document {name}:\n{doc[:2000]}..."
+            for name, doc in zip(file_names, docs)
+        )
+        # 構建更通用的提示詞
+        prompt = f"""Analyze the following documents and answer the query.
+        Query: {query}
+        Documents:
+        {context}
+        Instructions:
+        1. Understand the specific requirements of the query
+        2. Analyze the relevant parts of the documents
+        3. Consider all possible relationships and connections
+        4. Provide a direct and specific answer
+        5. Support your answer with evidence from the documents
+        Important:
+        - If the query asks about specific content, find and quote relevant parts
+        - If comparing documents, identify relevant similarities and differences
+        - If looking for relationships, analyze all possible connections
+        - Always provide evidence for your conclusions
+        - Be precise and direct in your answer
+        Format your response to directly address the query while providing supporting evidence.
+        """
+        response = llm_gpt4.invoke(prompt)
+        return response.content if hasattr(response, 'content') else str(response)
+    except Exception as e:
+        print(f"ERROR in analyze_documents: {str(e)}")
+        return f"Error analyzing documents: {str(e)}"
+def langgraph_tab6_main(query: str, file=None):
+    try:
+        print(f"DEBUG: Starting processing with query: {query}")
+        # 如果沒有文件，直接使用 general_run
+        if not file:
+            return general_run({"query": query})["answer"]
+        # 處理文件列表
+        files = file if isinstance(file, list) else [file]
+        all_docs = []
+        file_names = []
+        docs_by_file = []
+        # 處理上傳的文件
+        for f in files:
+            try:
+                path = get_file_path_tab6(f)
+                if not path:
+                    continue
+                file_names.append(os.path.basename(path))
+                # 根據文件類型選擇加載器
+                if path.lower().endswith('.pdf'):
+                    loader = PyPDFLoader(path)
+                elif path.lower().endswith('.docx'):
+                    loader = UnstructuredWordDocumentLoader(path)
+                else:
+                    loader = TextLoader(path)
+                docs = loader.load()
+                if docs:
+                    text = "\n".join(doc.page_content for doc in docs if hasattr(doc, 'page_content'))
+                    docs_by_file.append(text)
+                    all_docs.extend(docs)
+            except Exception as e:
+                print(f"ERROR processing file: {str(e)}")
+                continue
+        if not docs_by_file:
+            return general_run({"query": query})["answer"]
+        # 建立檢索器
+        try:
+            chunks = RecursiveCharacterTextSplitter(
+                chunk_size=500,
+                chunk_overlap=50
+            ).split_documents(all_docs)
+            db = FAISS.from_documents(chunks, embeddings)
+            retriever = db.as_retriever(search_kwargs={"k": 5})
+            global session_retriever, session_qa_chain
+            session_retriever = retriever
+            session_qa_chain = ConversationalRetrievalChain.from_llm(
+                llm=llm_gpt4,
+                retriever=retriever,
+                memory=ConversationBufferMemory(
+                    memory_key="chat_history",
+                    return_messages=True
+                ),
+            )
+        except Exception as e:
+            print(f"ERROR setting up retriever: {str(e)}")
+            retriever = None
+        # 使用通用分析函數處理查詢
+        return analyze_documents(query, docs_by_file, file_names)
+    except Exception as e:
+        print(f"ERROR in main function: {str(e)}")
+        return f"I apologize, but I encountered an error: {str(e)}"
 def compare_documents(doc1: str, doc2: str, file1_name: str, file2_name: str) -> str:
     try:
         prompt = f"""Compare the following two documents and identify their main differences:
     response = llm_gpt4.invoke(prompt)
     return response.content
 # === AutoGen 多代理人協作邏輯 ===
 def detect_intent_embedding(query, file_names=[]):
     query_emb = embedding_model.encode(query, normalize_embeddings=True)
 )
 # === LangGraph 節點函數 ===
 def general_run(state):
     """改用直接 LLM 回答取代 General Agent"""
         print(f"ERROR in general_run: {str(e)}")
         return {"answer": "I apologize, but I'm having trouble processing your request."}
+def docqa_run(state):
+    """文件問答處理"""
+    try:
+        # 如果有檢索器，使用檢索器
+        if "retriever" in state:
+            relevant_docs = state["retriever"].get_relevant_documents(state["query"])
+            context = "\n".join(d.page_content for d in relevant_docs)
+        else:
+            context = "\n".join(state["docs"])
+        prompt = f"""Based on the following context, please answer the question:
+        Question: {state["query"]}
+        Context:
+        {context[:3000]}
+        Provide a detailed and accurate answer based on the context."""
+        response = llm_gpt4.invoke(prompt)
+        return {"answer": response.content if hasattr(response, 'content') else str(response)}
+    except Exception as e:
+        print(f"ERROR in docqa_run: {str(e)}")
+        return general_run(state)
 def summariser_run(state):
+    """文件摘要處理"""
+    try:
+        context = "\n".join(state["docs"])
+        prompt = f"""Please provide a comprehensive summary of the following document:
+        {context[:3000]}
+        Focus on:
+        1. Main topics and key points
+        2. Important findings or conclusions
+        3. Significant details"""
+        response = llm_gpt4.invoke(prompt)
+        return {"summary": response.content if hasattr(response, 'content') else str(response)}
+    except Exception as e:
+        print(f"ERROR in summariser_run: {str(e)}")
+        return {"summary": "Error generating summary."}
 # === LangGraph 定義 ===
 def build_langgraph_pipeline():
         file_names = []
         docs_by_file = []
+        # 處理上傳的文件
         for f in files:
             try:
                 path = get_file_path_tab6(f)
             print(f"ERROR setting up retriever: {str(e)}")
             retriever = None
+        # 檢測是否為多文件查詢
+        if len(docs_by_file) > 1:
+            return analyze_documents(query, docs_by_file, file_names)
+        # 使用 LangGraph 處理單文件查詢
         state = {
             "query": query,
             "file_names": file_names,
             "retriever": retriever
         }
+        # 根據查詢意圖選擇處理方式
+        intent = detect_intent_embedding(query, file_names)
+        if intent == "Summarise":
+            return summariser_run(state)["summary"]
+        elif intent == "DocQA":
+            return docqa_run(state)["answer"]
+        else:
+            return general_run(state)["answer"]
     except Exception as e:
         print(f"ERROR in main function: {str(e)}")
 # Gradio Interface Settings
 demo_description = """