Spaces:

ChienChung
/

SmartRAG_Multi-Agent_Assistant

Runtime error

App Files Files Community

ChienChung commited on Apr 2, 2025

Commit

e26601a

verified ·

1 Parent(s): 6296028

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -148

app.py CHANGED Viewed

@@ -4,7 +4,6 @@ import shutil
 import json
 import torch
 import transformers
-import chardet
 from transformers import AutoModelForCausalLM, AutoTokenizer
 from transformers.models.llama.configuration_llama import LlamaConfig
 from huggingface_hub import hf_hub_download
@@ -45,11 +44,7 @@ from serpapi import GoogleSearch
 # CrewAI 部分：完全使用 CrewAI 的 Agent、Task、Crew 與 @tool 裝飾器
 from crewai import Crew, Agent, Task, Process
 from crewai.tools import tool
-from langchain_experimental.agents import create_pandas_dataframe_agent
-session_retriever = None
-session_qa_chain = None
-csv_dataframe = None  # CSV tool will use this
 # === Model and Device Setup ===
 if torch.backends.mps.is_available():
     device = "mps"
@@ -138,12 +133,7 @@ Answer:
 )
 llm_local = HuggingFacePipeline(pipeline=query_pipeline)
-llm_gpt4 = ChatOpenAI(model_name="gpt-4o-mini", temperature=0.2, openai_api_key=openai_api_key)
-crew_llm = ChatOpenAI(
-    model_name="gpt-4o-mini",
-    temperature=0.2,
-    openai_api_key=openai_api_key
-)
 memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
 qa_gpt = ConversationalRetrievalChain.from_llm(
@@ -244,16 +234,12 @@ def document_summarize(file):
     summary = summarize_chain.invoke(docs)
     return summary['output_text']
 def csv_agent(file, query):
     file_path = get_file_path(file)
     if file_path is None:
         return "Unable to obtain the uploaded CSV file."
     try:
-        with open(file_path, 'rb') as f:
-            result = chardet.detect(f.read())
-            encoding = result['encoding']
-        df = pd.read_csv(file_path, encoding=encoding)
     except Exception as e:
         return f"Error reading CSV: {e}"
     safe_dict = {"df": df, "pd": pd}
@@ -312,7 +298,7 @@ class SimpleQuery(BaseModel):
 @tool("summarise")
 def summarise_tool(query: str) -> str:
-    """Summarise: Use document summarisation functionality."""
     global session_retriever, session_qa_chain
     if session_retriever is None:
         return "尚未上傳文件。"
@@ -328,7 +314,7 @@ def summarise_tool(query: str) -> str:
 @tool("python_calc")
 def python_calc_tool(query: str) -> str:
-    """Python Calculation: Perform basic arithmetic or logical operations."""
     try:
         return str(eval(query))
     except Exception as e:
@@ -336,12 +322,12 @@ def python_calc_tool(query: str) -> str:
 @tool("search_agent")
 def search_tool_func(query: str) -> str:
-    """Search: Perform web searches using external search engines."""
     return search_agent(query)
 @tool("uploaded_qa")
 def uploaded_qa_tool_func(query: str) -> str:
-    """Document QA: Answer questions based on the uploaded document content."""
     global session_qa_chain
     if session_qa_chain is not None:
         try:
@@ -350,105 +336,52 @@ def uploaded_qa_tool_func(query: str) -> str:
             return f"文檔問答錯誤: {e}"
     else:
         return "尚未上傳文件。"
-@tool("csv_agent")
-def csv_tool_func(query: str) -> str:
-    """CSV Agent: Use natural language to analyse uploaded CSV files."""
-    global csv_dataframe
-    if csv_dataframe is None:
-        return "No CSV file uploaded."
-    try:
-        agent = create_pandas_dataframe_agent(llm=llm_gpt4, df=csv_dataframe, verbose=True)
-        return agent.run(f"Here is the table:\n{csv_dataframe.head().to_string(index=False)}\n\n{query}")
-    except Exception as e:
-        return f"CSV Agent error: {e}"
 # 建立 CrewAI 代理（僅針對 Tab 5）
 summarizer_agent = Agent(
-    role="Document Summarizer",
-    goal="Summarise the content of the uploaded document.",
-    backstory="You are a professional summarisation expert who can identify key points in long documents.",
     tools=[summarise_tool],
     verbose=True
 )
 document_qa_agent = Agent(
-    role="Document QA Specialist",
-    goal="Answer questions based on the uploaded document.",
-    backstory="You are an expert in document understanding and can accurately extract answers.",
     tools=[uploaded_qa_tool_func],
     verbose=True
 )
-search_agent = Agent(
-    role="Search Expert",
-    goal="Search the web and provide relevant information.",
-    backstory="You are an expert at finding relevant information from the internet.",
-    tools=[search_tool_func],
     verbose=True
 )
-math_agent = Agent(
-    role="Math Assistant",
-    goal="Perform accurate arithmetic or logical calculations.",
-    backstory="You are a calculator expert skilled at quick computations.",
-    tools=[python_calc_tool],
-    verbose=True
-)
-csv_agent = Agent(
-    role="CSV Analyst",
-    goal="Analyse tabular data and answer questions about the uploaded CSV file.",
-    backstory="You are skilled in interpreting tabular datasets and can extract numerical or logical insights.",
-    tools=[csv_tool_func],
-    verbose=True
-)
-router_agent = Agent(
-    role="Query Router",
-    goal="Determine the most suitable agent or tool to handle the user query.",
-    backstory="You are an intelligent query dispatcher that analyses the user's intent and chooses the best AI agent to answer.",
-    tools=[python_calc_tool, search_tool_func, csv_tool_func, uploaded_qa_tool_func, summarise_tool],
-    verbose=True
-)
 router_task = Task(
-    description=(
-        "Based on the user's query, decide which agent or tool is best suited to handle it:\n"
-        "- If the query is related to the content of an uploaded file (e.g., 'what is this document about?'), send it to the **Document QA Agent**.\n"
-        "- If the query contains words like 'summarize', 'summary', or 'main points', use the **Summarizer Agent**.\n"
-        "- If the query involves numbers, calculations, or logic (e.g., '50 * 23 - 5', 'what is 10% of 800'), send it to the **Math Agent**.\n"
-        "- If the user uploaded a CSV file and asks about table content, data trends, or uses words like 'data', 'table', 'csv', 'column', or 'row', send it to the **CSV Agent**.\n"
-        "- If the user asks about current events, trending topics, or online information (e.g., 'What is LangChain?', 'latest news'), send it to the **Search Agent**.\n"
-        "- If none of these apply, use your best judgment to choose the most relevant agent."
-    ),
-    expected_output="The final answer from the selected agent or tool.",
-    agent=router_agent,
-    input_variables=["query"]
 )
 crew = Crew(
-    agents=[summarizer_agent, document_qa_agent, search_agent, math_agent, csv_agent],
     tasks=[router_task],
     process=Process.sequential,
-    verbose=True,
-    llm=crew_llm
 )
 def multi_agent_chat(query: str) -> str:
-    print(f"Routing query: {query}")
     try:
-        result = crew.kickoff(inputs={"query": query})
-        result_str = str(result)
-        if "I don't know." in result_str or result_str.strip() == "":
-            return search_agent(query)  # fallback 給搜尋
-        step = result.steps[-1] if result and hasattr(result, "steps") else None
-        agent_name = step.agent.name if step else "Unknown"
-        output = step.output if step else str(result)
-        return f"[Agent: {agent_name}]\n{output}"
     except Exception as e:
         return f"Error: {e}"
 def multi_agent_chat_advanced(query: str, file=None) -> str:
     global session_retriever, session_qa_chain
-    # 判斷是否為與文件無關的查詢
     non_doc_keywords = ["calculate", "sum", "date", "time", "how many", "how much", "weather", "temperature"]
     use_file_chain = True
     for kw in non_doc_keywords:
@@ -460,31 +393,12 @@ def multi_agent_chat_advanced(query: str, file=None) -> str:
         file_path = get_file_path(file)
         if file_path is None:
             return "Unable to process the file format."
-        # === CSV 處理 ===
         if file_path.lower().endswith(".csv"):
-            global csv_dataframe
-            try:
-                with open(file_path, 'rb') as f:
-                    result = chardet.detect(f.read())
-                    encoding = result['encoding']
-                df = pd.read_csv(file_path, encoding=encoding)
-                csv_dataframe = df
-                result = crew.kickoff(inputs={"query": query})
-                step = result.steps[-1] if result and hasattr(result, "steps") else None
-                agent_name = step.agent.name if step else "Unknown"
-                output = step.output if step else str(result)
-                return f"[Agent: {agent_name}]\n{output}"
-            except Exception as e:
-                return f"Error reading CSV: {e}"
-        # === 文本類型文件（PDF / DOCX / TXT） ===
         elif file_path.lower().endswith((".pdf", ".txt", ".docx")):
-            loader = (
-                PyPDFLoader(file_path) if file_path.lower().endswith(".pdf")
-                else UnstructuredWordDocumentLoader(file_path) if file_path.lower().endswith(".docx")
-                else TextLoader(file_path)
-            )
             docs = loader.load()
             chunks = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50).split_documents(docs)
             db = FAISS.from_documents(chunks, embeddings)
@@ -493,55 +407,28 @@ def multi_agent_chat_advanced(query: str, file=None) -> str:
                 llm=llm_gpt4,
                 retriever=session_retriever,
                 memory=ConversationBufferMemory(memory_key="chat_history", return_messages=True),
-                #combine_docs_chain_kwargs={"prompt": custom_prompt}
             )
-            # 決定使用摘要還是 QA
-            if any(kw in query.lower() for kw in ["summarize", "summary", "摘要", "總結"]):
-                return document_summarize(file_path)
-            elif use_file_chain:
-                try:
-                    return session_qa_chain.run(query)
-                except Exception as e:
-                    return f"Error: {e}"
             else:
                 try:
-                    result = crew.kickoff(inputs={"query": query})
-                    step = result.steps[-1] if result and hasattr(result, "steps") else None
-                    agent_name = step.agent.name if step else "Unknown"
-                    output = step.output if step else str(result)
-                    return f"[Agent: {agent_name}]\n{output}"
                 except Exception as e:
                     return f"Error: {e}"
         else:
             return "Unsupported file format."
-    # 沒有上傳新檔案
     elif session_qa_chain is not None:
         if use_file_chain:
-            try:
-                return session_qa_chain.run(query)
-            except Exception as e:
-                return f"Error: {e}"
         else:
             try:
-                result = crew.kickoff(inputs={"query": query})
-                step = result.steps[-1] if result and hasattr(result, "steps") else None
-                agent_name = step.agent.name if step else "Unknown"
-                output = step.output if step else str(result)
-                return f"[Agent: {agent_name}]\n{output}"
             except Exception as e:
                 return f"Error: {e}"
-    # 沒有 session，直接丟給 CrewAI
     else:
         try:
-            result = crew.kickoff(inputs={"query": query})
-            step = result.steps[-1] if result and hasattr(result, "steps") else None
-            agent_name = step.agent.name if step else "Unknown"
-            output = step.output if step else str(result)
-            return f"[Agent: {agent_name}]\n{output}"
         except Exception as e:
             return f"Error: {e}"

 import json
 import torch
 import transformers
 from transformers import AutoModelForCausalLM, AutoTokenizer
 from transformers.models.llama.configuration_llama import LlamaConfig
 from huggingface_hub import hf_hub_download
 # CrewAI 部分：完全使用 CrewAI 的 Agent、Task、Crew 與 @tool 裝飾器
 from crewai import Crew, Agent, Task, Process
 from crewai.tools import tool
 # === Model and Device Setup ===
 if torch.backends.mps.is_available():
     device = "mps"
 )
 llm_local = HuggingFacePipeline(pipeline=query_pipeline)
+llm_gpt4 = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0.2, openai_api_key=openai_api_key)
 memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
 qa_gpt = ConversationalRetrievalChain.from_llm(
     summary = summarize_chain.invoke(docs)
     return summary['output_text']
 def csv_agent(file, query):
     file_path = get_file_path(file)
     if file_path is None:
         return "Unable to obtain the uploaded CSV file."
     try:
+        df = pd.read_csv(file_path)
     except Exception as e:
         return f"Error reading CSV: {e}"
     safe_dict = {"df": df, "pd": pd}
 @tool("summarise")
 def summarise_tool(query: str) -> str:
+    """Summarise: 使用文件摘要功能。"""
     global session_retriever, session_qa_chain
     if session_retriever is None:
         return "尚未上傳文件。"
 @tool("python_calc")
 def python_calc_tool(query: str) -> str:
+    """Python Calculation: 執行簡單計算。"""
     try:
         return str(eval(query))
     except Exception as e:
 @tool("search_agent")
 def search_tool_func(query: str) -> str:
+    """Search: 執行網路搜尋。"""
     return search_agent(query)
 @tool("uploaded_qa")
 def uploaded_qa_tool_func(query: str) -> str:
+    """Document QA: 根據上傳文件回答問題。"""
     global session_qa_chain
     if session_qa_chain is not None:
         try:
             return f"文檔問答錯誤: {e}"
     else:
         return "尚未上傳文件。"
 # 建立 CrewAI 代理（僅針對 Tab 5）
 summarizer_agent = Agent(
+    role="文件摘要助手",
+    goal="對上傳文件內容進行摘要",
+    backstory="你是一位專業的摘要專家，能抓住長文的重點。",
     tools=[summarise_tool],
     verbose=True
 )
 document_qa_agent = Agent(
+    role="文件問答專家",
+    goal="根據上傳文件回答問題",
+    backstory="你精通文檔內容，能從中找出問題答案。",
     tools=[uploaded_qa_tool_func],
     verbose=True
 )
+general_agent = Agent(
+    role="綜合助手",
+    goal="回答一般問題，執行計算與網路搜尋",
+    backstory="你是一位多才多藝的AI助理，能根據需要使用工具。",
+    tools=[python_calc_tool, search_tool_func],
     verbose=True
 )
 router_task = Task(
+    description="根據使用者查詢自動決定使用哪個工具進行回答。",
+    expected_output="最終回答",
+    agent=general_agent
 )
 crew = Crew(
+    agents=[summarizer_agent, document_qa_agent, general_agent],
     tasks=[router_task],
     process=Process.sequential,
+    verbose=True
 )
 def multi_agent_chat(query: str) -> str:
     try:
+        return crew.run(query)
     except Exception as e:
         return f"Error: {e}"
 def multi_agent_chat_advanced(query: str, file=None) -> str:
     global session_retriever, session_qa_chain
+    # 定義一些明顯與文件無關的關鍵字
     non_doc_keywords = ["calculate", "sum", "date", "time", "how many", "how much", "weather", "temperature"]
     use_file_chain = True
     for kw in non_doc_keywords:
         file_path = get_file_path(file)
         if file_path is None:
             return "Unable to process the file format."
         if file_path.lower().endswith(".csv"):
+            return csv_agent(file, query)
         elif file_path.lower().endswith((".pdf", ".txt", ".docx")):
+            loader = (PyPDFLoader(file_path) if file_path.lower().endswith(".pdf")
+                      else UnstructuredWordDocumentLoader(file_path) if file_path.lower().endswith(".docx")
+                      else TextLoader(file_path))
             docs = loader.load()
             chunks = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50).split_documents(docs)
             db = FAISS.from_documents(chunks, embeddings)
                 llm=llm_gpt4,
                 retriever=session_retriever,
                 memory=ConversationBufferMemory(memory_key="chat_history", return_messages=True),
+                combine_docs_chain_kwargs={"prompt": custom_prompt}
             )
+            if use_file_chain:
+                return session_qa_chain.run(query)
             else:
                 try:
+                    return crew.run(query)
                 except Exception as e:
                     return f"Error: {e}"
         else:
             return "Unsupported file format."
     elif session_qa_chain is not None:
         if use_file_chain:
+            return session_qa_chain.run(query)
         else:
             try:
+                return crew.run(query)
             except Exception as e:
                 return f"Error: {e}"
     else:
         try:
+            return crew.run(query)
         except Exception as e:
             return f"Error: {e}"