ChienChung commited on
Commit
f14892a
·
verified ·
1 Parent(s): 054fb4f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -147
app.py CHANGED
@@ -1466,21 +1466,19 @@ def docqa_run(state):
1466
  return {"answer": result.output}
1467
 
1468
  def general_run(state):
1469
- result = general_agent.execute_task(general_task, {"query": state["query"]})
1470
- if isinstance(result, str):
1471
- output = result.lower()
1472
- else:
1473
- output = result.output.lower()
1474
- if any(x in output for x in ["i don't know", "no idea", "not sure", "can't answer"]):
1475
- result = search_agent.execute_task(search_task, {"query": state["query"]})
1476
- if isinstance(result, str):
1477
- return {"answer": result}
1478
- else:
1479
- return {"answer": result.output}
1480
- if isinstance(result, str):
1481
- return {"answer": result}
1482
- else:
1483
- return {"answer": result.output}
1484
 
1485
  def summariser_run(state):
1486
  result = summarizer_agent.execute_task(summariser_task, {"query": state["query"]})
@@ -1509,13 +1507,6 @@ def build_langgraph_pipeline():
1509
 
1510
  from tempfile import mkdtemp
1511
 
1512
-
1513
- from tempfile import mkdtemp
1514
- import os
1515
-
1516
- from tempfile import mkdtemp
1517
- import os
1518
-
1519
  def get_file_path_tab6(file):
1520
  if isinstance(file, str):
1521
  print("DEBUG: File is a string:", file)
@@ -1573,60 +1564,46 @@ def get_file_path_tab6(file):
1573
  def langgraph_tab6_main(query: str, file=None):
1574
  try:
1575
  print(f"DEBUG: Starting processing with query: {query}")
1576
- print(f"DEBUG: File input type: {type(file)}")
 
 
 
1577
 
1578
  # 處理文件列表
1579
- files = file if isinstance(file, list) else [file] if file else []
1580
- all_docs = [] # 用於建立檢索器
1581
- file_names = [] # 檔案名稱列表
1582
- docs_by_file = [] # 每份文件的完整內文
1583
 
1584
- # 處理上傳的文件
1585
  for f in files:
1586
  try:
1587
  path = get_file_path_tab6(f)
1588
  if not path:
1589
- print(f"WARNING: Could not process file {f}, skipping")
1590
  continue
1591
-
1592
- print(f"DEBUG: Successfully got file path: {path}")
1593
  file_names.append(os.path.basename(path))
1594
 
1595
- # 根據文件類型選擇適當的加載器
1596
  if path.lower().endswith('.pdf'):
1597
  loader = PyPDFLoader(path)
1598
  elif path.lower().endswith('.docx'):
1599
  loader = UnstructuredWordDocumentLoader(path)
1600
  else:
1601
  loader = TextLoader(path)
1602
-
1603
- print(f"DEBUG: Using loader: {type(loader)}")
1604
 
1605
- try:
1606
- docs = loader.load()
1607
- print(f"DEBUG: Successfully loaded document, got {len(docs)} pages/chunks")
1608
-
1609
- # 提取文件內容
1610
- if docs and hasattr(docs[0], "page_content"):
1611
- text = "\n".join([doc.page_content for doc in docs])
1612
- else:
1613
- text = "\n".join(docs)
1614
-
1615
  docs_by_file.append(text)
1616
  all_docs.extend(docs)
1617
-
1618
- except Exception as e:
1619
- print(f"ERROR loading document: {str(e)}")
1620
- continue
1621
-
1622
  except Exception as e:
1623
  print(f"ERROR processing file: {str(e)}")
1624
  continue
1625
 
1626
- if not all_docs:
1627
- return "No valid documents were processed. Please check your file upload."
1628
 
1629
- # 建立檢索器和問答鏈
1630
  try:
1631
  chunks = RecursiveCharacterTextSplitter(
1632
  chunk_size=500,
@@ -1634,10 +1611,7 @@ def langgraph_tab6_main(query: str, file=None):
1634
  ).split_documents(all_docs)
1635
 
1636
  db = FAISS.from_documents(chunks, embeddings)
1637
- retriever = db.as_retriever(
1638
- search_type="similarity",
1639
- search_kwargs={"k": 5}
1640
- )
1641
 
1642
  global session_retriever, session_qa_chain
1643
  session_retriever = retriever
@@ -1655,16 +1629,12 @@ def langgraph_tab6_main(query: str, file=None):
1655
 
1656
  # 解析查詢意圖
1657
  parsed = parse_query(query)
1658
- print(f"DEBUG: Parsed query: {parsed}")
1659
-
1660
- # 檢測是否需要使用 AutoGen 多代理協作
1661
  if needs_multi_agent_processing(query, parsed, docs_by_file):
1662
- print("DEBUG: Using AutoGen for complex query")
1663
- return handle_complex_query_with_autogen(query, docs_by_file, file_names)
1664
 
1665
  # 使用 LangGraph 處理一般查詢
1666
- print("DEBUG: Using LangGraph for standard query")
1667
- graph = build_langgraph_pipeline()
1668
  state = {
1669
  "query": query,
1670
  "file_names": file_names,
@@ -1672,12 +1642,26 @@ def langgraph_tab6_main(query: str, file=None):
1672
  "retriever": retriever
1673
  }
1674
 
 
1675
  result = graph.invoke(state)
1676
- return process_result(result, query)
 
 
 
 
 
 
 
 
 
 
 
 
 
1677
 
1678
  except Exception as e:
1679
  print(f"ERROR in main function: {str(e)}")
1680
- return f"[Tab6 Error] {str(e)}"
1681
 
1682
  def needs_multi_agent_processing(query: str, parsed: dict, docs: list) -> bool:
1683
  return any([
@@ -1694,88 +1678,17 @@ def needs_multi_agent_processing(query: str, parsed: dict, docs: list) -> bool:
1694
  ])
1695
  ])
1696
 
1697
- def handle_complex_query_with_autogen(query: str, docs: list, file_names: list) -> str:
1698
- """使用 AutoGen 處理複雜查詢"""
1699
- try:
1700
- # 準備文件上下文
1701
- context = "\n\n".join(
1702
- f"Document {name}:\n{doc[:2000]}..."
1703
- for name, doc in zip(file_names, docs)
1704
- )
1705
-
1706
- # 定義專門的代理人
1707
- qa_agent = AssistantAgent(
1708
- name="QA_Expert",
1709
- system_message="You are an expert at analyzing documents and answering questions.",
1710
- llm_config={"config_list": [{"model": "gpt-4", "api_key": openai_api_key}]}
1711
- )
1712
-
1713
- summarizer_agent = AssistantAgent(
1714
- name="Summarizer",
1715
- system_message="You are an expert at summarizing documents and identifying key points.",
1716
- llm_config={"config_list": [{"model": "gpt-4", "api_key": openai_api_key}]}
1717
- )
1718
-
1719
- comparison_agent = AssistantAgent(
1720
- name="Comparator",
1721
- system_message="You are an expert at comparing documents and finding relationships.",
1722
- llm_config={"config_list": [{"model": "gpt-4", "api_key": openai_api_key}]}
1723
- )
1724
-
1725
- user_proxy = UserProxyAgent(
1726
- name="User",
1727
- system_message="A user seeking information from documents.",
1728
- human_input_mode="NEVER"
1729
- )
1730
-
1731
- # 創建群組聊天
1732
- groupchat = GroupChat(
1733
- agents=[user_proxy, qa_agent, summarizer_agent, comparison_agent],
1734
- messages=[],
1735
- max_round=5
1736
- )
1737
-
1738
- manager = GroupChatManager(groupchat=groupchat)
1739
-
1740
- # 準備任務提示
1741
- task_prompt = f"""Analyze the following documents and answer the query:
1742
-
1743
- Query: {query}
1744
-
1745
- Context:
1746
- {context}
1747
-
1748
- Please provide:
1749
- 1. Document analysis and relevant information
1750
- 2. Direct answer to the query
1751
- 3. Any important relationships or connections found
1752
- """
1753
-
1754
- # 執行群組討論
1755
- user_proxy.initiate_chat(
1756
- manager,
1757
- message=task_prompt
1758
- )
1759
-
1760
- # 獲取最終結果
1761
- final_answer = user_proxy.last_message()["content"]
1762
-
1763
- # 使用結果融合代理整理最終答案
1764
- fusion_prompt = f"""Based on the analysis provided, create a clear and concise response that:
1765
- 1. Directly answers the user's query: "{query}"
1766
- 2. Includes relevant supporting information
1767
- 3. Maintains a natural, conversational tone
1768
-
1769
- Analysis to summarize:
1770
- {final_answer}
1771
- """
1772
-
1773
- final_response = llm_gpt4.invoke(fusion_prompt)
1774
- return final_response.content
1775
-
1776
- except Exception as e:
1777
- print(f"ERROR in AutoGen processing: {str(e)}")
1778
- return f"Error during multi-agent processing: {str(e)}"
1779
 
1780
  def process_result(result: dict, query: str) -> str:
1781
  """處理查詢結果"""
 
1466
  return {"answer": result.output}
1467
 
1468
  def general_run(state):
1469
+ """改用直接 LLM 回答取代 General Agent"""
1470
+ try:
1471
+ prompt = f"""You are a helpful AI assistant. Please answer the following question:
1472
+ {state["query"]}
1473
+
1474
+ Provide a clear and informative answer."""
1475
+
1476
+ response = llm_gpt4.invoke(prompt)
1477
+ answer = response.content if hasattr(response, 'content') else str(response)
1478
+ return {"answer": answer}
1479
+ except Exception as e:
1480
+ print(f"ERROR in general_run: {str(e)}")
1481
+ return {"answer": "I apologize, but I'm having trouble processing your request."}
 
 
1482
 
1483
  def summariser_run(state):
1484
  result = summarizer_agent.execute_task(summariser_task, {"query": state["query"]})
 
1507
 
1508
  from tempfile import mkdtemp
1509
 
 
 
 
 
 
 
 
1510
  def get_file_path_tab6(file):
1511
  if isinstance(file, str):
1512
  print("DEBUG: File is a string:", file)
 
1564
  def langgraph_tab6_main(query: str, file=None):
1565
  try:
1566
  print(f"DEBUG: Starting processing with query: {query}")
1567
+
1568
+ # 如果沒有文件,直接使用 general_run
1569
+ if not file:
1570
+ return general_run({"query": query})["answer"]
1571
 
1572
  # 處理文件列表
1573
+ files = file if isinstance(file, list) else [file]
1574
+ all_docs = []
1575
+ file_names = []
1576
+ docs_by_file = []
1577
 
 
1578
  for f in files:
1579
  try:
1580
  path = get_file_path_tab6(f)
1581
  if not path:
 
1582
  continue
1583
+
 
1584
  file_names.append(os.path.basename(path))
1585
 
1586
+ # 根據文件類型選擇加載器
1587
  if path.lower().endswith('.pdf'):
1588
  loader = PyPDFLoader(path)
1589
  elif path.lower().endswith('.docx'):
1590
  loader = UnstructuredWordDocumentLoader(path)
1591
  else:
1592
  loader = TextLoader(path)
 
 
1593
 
1594
+ docs = loader.load()
1595
+ if docs:
1596
+ text = "\n".join(doc.page_content for doc in docs if hasattr(doc, 'page_content'))
 
 
 
 
 
 
 
1597
  docs_by_file.append(text)
1598
  all_docs.extend(docs)
 
 
 
 
 
1599
  except Exception as e:
1600
  print(f"ERROR processing file: {str(e)}")
1601
  continue
1602
 
1603
+ if not docs_by_file:
1604
+ return general_run({"query": query})["answer"]
1605
 
1606
+ # 建立檢索器
1607
  try:
1608
  chunks = RecursiveCharacterTextSplitter(
1609
  chunk_size=500,
 
1611
  ).split_documents(all_docs)
1612
 
1613
  db = FAISS.from_documents(chunks, embeddings)
1614
+ retriever = db.as_retriever(search_kwargs={"k": 5})
 
 
 
1615
 
1616
  global session_retriever, session_qa_chain
1617
  session_retriever = retriever
 
1629
 
1630
  # 解析查詢意圖
1631
  parsed = parse_query(query)
1632
+
1633
+ # 如果是複雜查詢(比較、關聯分析等),使用 execute_multi_agent
 
1634
  if needs_multi_agent_processing(query, parsed, docs_by_file):
1635
+ return execute_multi_agent(parsed, docs_by_file, file_names)
 
1636
 
1637
  # 使用 LangGraph 處理一般查詢
 
 
1638
  state = {
1639
  "query": query,
1640
  "file_names": file_names,
 
1642
  "retriever": retriever
1643
  }
1644
 
1645
+ graph = build_langgraph_pipeline()
1646
  result = graph.invoke(state)
1647
+
1648
+ # 處理結果
1649
+ if isinstance(result, dict):
1650
+ if "answer" in result:
1651
+ return result["answer"]
1652
+ elif "summary" in result:
1653
+ return result["summary"]
1654
+ elif session_qa_chain:
1655
+ try:
1656
+ return session_qa_chain.run(query)
1657
+ except Exception as e:
1658
+ print(f"ERROR in QA chain: {str(e)}")
1659
+
1660
+ return "I apologize, but I couldn't process your query properly."
1661
 
1662
  except Exception as e:
1663
  print(f"ERROR in main function: {str(e)}")
1664
+ return f"I apologize, but I encountered an error: {str(e)}"
1665
 
1666
  def needs_multi_agent_processing(query: str, parsed: dict, docs: list) -> bool:
1667
  return any([
 
1678
  ])
1679
  ])
1680
 
1681
+ def needs_multi_agent_processing(query: str, parsed: dict, docs: list) -> bool:
1682
+ """判斷是否需要多代理處理"""
1683
+ return any([
1684
+ parsed.get("summarize_files"),
1685
+ parsed.get("compare_files"),
1686
+ parsed.get("find_relations"),
1687
+ len(docs) > 1 and any(x in query.lower() for x in [
1688
+ "compare", "between", "both", "relation",
1689
+ "project", "similar", "different"
1690
+ ])
1691
+ ])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1692
 
1693
  def process_result(result: dict, query: str) -> str:
1694
  """處理查詢結果"""