Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1466,21 +1466,19 @@ def docqa_run(state):
|
|
| 1466 |
return {"answer": result.output}
|
| 1467 |
|
| 1468 |
def general_run(state):
|
| 1469 |
-
|
| 1470 |
-
|
| 1471 |
-
|
| 1472 |
-
|
| 1473 |
-
|
| 1474 |
-
|
| 1475 |
-
|
| 1476 |
-
|
| 1477 |
-
|
| 1478 |
-
|
| 1479 |
-
|
| 1480 |
-
|
| 1481 |
-
return {"answer":
|
| 1482 |
-
else:
|
| 1483 |
-
return {"answer": result.output}
|
| 1484 |
|
| 1485 |
def summariser_run(state):
|
| 1486 |
result = summarizer_agent.execute_task(summariser_task, {"query": state["query"]})
|
|
@@ -1509,13 +1507,6 @@ def build_langgraph_pipeline():
|
|
| 1509 |
|
| 1510 |
from tempfile import mkdtemp
|
| 1511 |
|
| 1512 |
-
|
| 1513 |
-
from tempfile import mkdtemp
|
| 1514 |
-
import os
|
| 1515 |
-
|
| 1516 |
-
from tempfile import mkdtemp
|
| 1517 |
-
import os
|
| 1518 |
-
|
| 1519 |
def get_file_path_tab6(file):
|
| 1520 |
if isinstance(file, str):
|
| 1521 |
print("DEBUG: File is a string:", file)
|
|
@@ -1573,60 +1564,46 @@ def get_file_path_tab6(file):
|
|
| 1573 |
def langgraph_tab6_main(query: str, file=None):
|
| 1574 |
try:
|
| 1575 |
print(f"DEBUG: Starting processing with query: {query}")
|
| 1576 |
-
|
|
|
|
|
|
|
|
|
|
| 1577 |
|
| 1578 |
# 處理文件列表
|
| 1579 |
-
files = file if isinstance(file, list) else [file]
|
| 1580 |
-
all_docs = []
|
| 1581 |
-
file_names = []
|
| 1582 |
-
docs_by_file = []
|
| 1583 |
|
| 1584 |
-
# 處理上傳的文件
|
| 1585 |
for f in files:
|
| 1586 |
try:
|
| 1587 |
path = get_file_path_tab6(f)
|
| 1588 |
if not path:
|
| 1589 |
-
print(f"WARNING: Could not process file {f}, skipping")
|
| 1590 |
continue
|
| 1591 |
-
|
| 1592 |
-
print(f"DEBUG: Successfully got file path: {path}")
|
| 1593 |
file_names.append(os.path.basename(path))
|
| 1594 |
|
| 1595 |
-
# 根據文件類型選擇
|
| 1596 |
if path.lower().endswith('.pdf'):
|
| 1597 |
loader = PyPDFLoader(path)
|
| 1598 |
elif path.lower().endswith('.docx'):
|
| 1599 |
loader = UnstructuredWordDocumentLoader(path)
|
| 1600 |
else:
|
| 1601 |
loader = TextLoader(path)
|
| 1602 |
-
|
| 1603 |
-
print(f"DEBUG: Using loader: {type(loader)}")
|
| 1604 |
|
| 1605 |
-
|
| 1606 |
-
|
| 1607 |
-
|
| 1608 |
-
|
| 1609 |
-
# 提取文件內容
|
| 1610 |
-
if docs and hasattr(docs[0], "page_content"):
|
| 1611 |
-
text = "\n".join([doc.page_content for doc in docs])
|
| 1612 |
-
else:
|
| 1613 |
-
text = "\n".join(docs)
|
| 1614 |
-
|
| 1615 |
docs_by_file.append(text)
|
| 1616 |
all_docs.extend(docs)
|
| 1617 |
-
|
| 1618 |
-
except Exception as e:
|
| 1619 |
-
print(f"ERROR loading document: {str(e)}")
|
| 1620 |
-
continue
|
| 1621 |
-
|
| 1622 |
except Exception as e:
|
| 1623 |
print(f"ERROR processing file: {str(e)}")
|
| 1624 |
continue
|
| 1625 |
|
| 1626 |
-
if not
|
| 1627 |
-
return "
|
| 1628 |
|
| 1629 |
-
# 建立檢索器
|
| 1630 |
try:
|
| 1631 |
chunks = RecursiveCharacterTextSplitter(
|
| 1632 |
chunk_size=500,
|
|
@@ -1634,10 +1611,7 @@ def langgraph_tab6_main(query: str, file=None):
|
|
| 1634 |
).split_documents(all_docs)
|
| 1635 |
|
| 1636 |
db = FAISS.from_documents(chunks, embeddings)
|
| 1637 |
-
retriever = db.as_retriever(
|
| 1638 |
-
search_type="similarity",
|
| 1639 |
-
search_kwargs={"k": 5}
|
| 1640 |
-
)
|
| 1641 |
|
| 1642 |
global session_retriever, session_qa_chain
|
| 1643 |
session_retriever = retriever
|
|
@@ -1655,16 +1629,12 @@ def langgraph_tab6_main(query: str, file=None):
|
|
| 1655 |
|
| 1656 |
# 解析查詢意圖
|
| 1657 |
parsed = parse_query(query)
|
| 1658 |
-
|
| 1659 |
-
|
| 1660 |
-
# 檢測是否需要使用 AutoGen 多代理協作
|
| 1661 |
if needs_multi_agent_processing(query, parsed, docs_by_file):
|
| 1662 |
-
|
| 1663 |
-
return handle_complex_query_with_autogen(query, docs_by_file, file_names)
|
| 1664 |
|
| 1665 |
# 使用 LangGraph 處理一般查詢
|
| 1666 |
-
print("DEBUG: Using LangGraph for standard query")
|
| 1667 |
-
graph = build_langgraph_pipeline()
|
| 1668 |
state = {
|
| 1669 |
"query": query,
|
| 1670 |
"file_names": file_names,
|
|
@@ -1672,12 +1642,26 @@ def langgraph_tab6_main(query: str, file=None):
|
|
| 1672 |
"retriever": retriever
|
| 1673 |
}
|
| 1674 |
|
|
|
|
| 1675 |
result = graph.invoke(state)
|
| 1676 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1677 |
|
| 1678 |
except Exception as e:
|
| 1679 |
print(f"ERROR in main function: {str(e)}")
|
| 1680 |
-
return f"
|
| 1681 |
|
| 1682 |
def needs_multi_agent_processing(query: str, parsed: dict, docs: list) -> bool:
|
| 1683 |
return any([
|
|
@@ -1694,88 +1678,17 @@ def needs_multi_agent_processing(query: str, parsed: dict, docs: list) -> bool:
|
|
| 1694 |
])
|
| 1695 |
])
|
| 1696 |
|
| 1697 |
-
def
|
| 1698 |
-
"""
|
| 1699 |
-
|
| 1700 |
-
|
| 1701 |
-
|
| 1702 |
-
|
| 1703 |
-
|
| 1704 |
-
|
| 1705 |
-
|
| 1706 |
-
|
| 1707 |
-
|
| 1708 |
-
name="QA_Expert",
|
| 1709 |
-
system_message="You are an expert at analyzing documents and answering questions.",
|
| 1710 |
-
llm_config={"config_list": [{"model": "gpt-4", "api_key": openai_api_key}]}
|
| 1711 |
-
)
|
| 1712 |
-
|
| 1713 |
-
summarizer_agent = AssistantAgent(
|
| 1714 |
-
name="Summarizer",
|
| 1715 |
-
system_message="You are an expert at summarizing documents and identifying key points.",
|
| 1716 |
-
llm_config={"config_list": [{"model": "gpt-4", "api_key": openai_api_key}]}
|
| 1717 |
-
)
|
| 1718 |
-
|
| 1719 |
-
comparison_agent = AssistantAgent(
|
| 1720 |
-
name="Comparator",
|
| 1721 |
-
system_message="You are an expert at comparing documents and finding relationships.",
|
| 1722 |
-
llm_config={"config_list": [{"model": "gpt-4", "api_key": openai_api_key}]}
|
| 1723 |
-
)
|
| 1724 |
-
|
| 1725 |
-
user_proxy = UserProxyAgent(
|
| 1726 |
-
name="User",
|
| 1727 |
-
system_message="A user seeking information from documents.",
|
| 1728 |
-
human_input_mode="NEVER"
|
| 1729 |
-
)
|
| 1730 |
-
|
| 1731 |
-
# 創建群組聊天
|
| 1732 |
-
groupchat = GroupChat(
|
| 1733 |
-
agents=[user_proxy, qa_agent, summarizer_agent, comparison_agent],
|
| 1734 |
-
messages=[],
|
| 1735 |
-
max_round=5
|
| 1736 |
-
)
|
| 1737 |
-
|
| 1738 |
-
manager = GroupChatManager(groupchat=groupchat)
|
| 1739 |
-
|
| 1740 |
-
# 準備任務提示
|
| 1741 |
-
task_prompt = f"""Analyze the following documents and answer the query:
|
| 1742 |
-
|
| 1743 |
-
Query: {query}
|
| 1744 |
-
|
| 1745 |
-
Context:
|
| 1746 |
-
{context}
|
| 1747 |
-
|
| 1748 |
-
Please provide:
|
| 1749 |
-
1. Document analysis and relevant information
|
| 1750 |
-
2. Direct answer to the query
|
| 1751 |
-
3. Any important relationships or connections found
|
| 1752 |
-
"""
|
| 1753 |
-
|
| 1754 |
-
# 執行群組討論
|
| 1755 |
-
user_proxy.initiate_chat(
|
| 1756 |
-
manager,
|
| 1757 |
-
message=task_prompt
|
| 1758 |
-
)
|
| 1759 |
-
|
| 1760 |
-
# 獲取最終結果
|
| 1761 |
-
final_answer = user_proxy.last_message()["content"]
|
| 1762 |
-
|
| 1763 |
-
# 使用結果融合代理整理最終答案
|
| 1764 |
-
fusion_prompt = f"""Based on the analysis provided, create a clear and concise response that:
|
| 1765 |
-
1. Directly answers the user's query: "{query}"
|
| 1766 |
-
2. Includes relevant supporting information
|
| 1767 |
-
3. Maintains a natural, conversational tone
|
| 1768 |
-
|
| 1769 |
-
Analysis to summarize:
|
| 1770 |
-
{final_answer}
|
| 1771 |
-
"""
|
| 1772 |
-
|
| 1773 |
-
final_response = llm_gpt4.invoke(fusion_prompt)
|
| 1774 |
-
return final_response.content
|
| 1775 |
-
|
| 1776 |
-
except Exception as e:
|
| 1777 |
-
print(f"ERROR in AutoGen processing: {str(e)}")
|
| 1778 |
-
return f"Error during multi-agent processing: {str(e)}"
|
| 1779 |
|
| 1780 |
def process_result(result: dict, query: str) -> str:
|
| 1781 |
"""處理查詢結果"""
|
|
|
|
| 1466 |
return {"answer": result.output}
|
| 1467 |
|
| 1468 |
def general_run(state):
|
| 1469 |
+
"""改用直接 LLM 回答取代 General Agent"""
|
| 1470 |
+
try:
|
| 1471 |
+
prompt = f"""You are a helpful AI assistant. Please answer the following question:
|
| 1472 |
+
{state["query"]}
|
| 1473 |
+
|
| 1474 |
+
Provide a clear and informative answer."""
|
| 1475 |
+
|
| 1476 |
+
response = llm_gpt4.invoke(prompt)
|
| 1477 |
+
answer = response.content if hasattr(response, 'content') else str(response)
|
| 1478 |
+
return {"answer": answer}
|
| 1479 |
+
except Exception as e:
|
| 1480 |
+
print(f"ERROR in general_run: {str(e)}")
|
| 1481 |
+
return {"answer": "I apologize, but I'm having trouble processing your request."}
|
|
|
|
|
|
|
| 1482 |
|
| 1483 |
def summariser_run(state):
|
| 1484 |
result = summarizer_agent.execute_task(summariser_task, {"query": state["query"]})
|
|
|
|
| 1507 |
|
| 1508 |
from tempfile import mkdtemp
|
| 1509 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1510 |
def get_file_path_tab6(file):
|
| 1511 |
if isinstance(file, str):
|
| 1512 |
print("DEBUG: File is a string:", file)
|
|
|
|
| 1564 |
def langgraph_tab6_main(query: str, file=None):
|
| 1565 |
try:
|
| 1566 |
print(f"DEBUG: Starting processing with query: {query}")
|
| 1567 |
+
|
| 1568 |
+
# 如果沒有文件,直接使用 general_run
|
| 1569 |
+
if not file:
|
| 1570 |
+
return general_run({"query": query})["answer"]
|
| 1571 |
|
| 1572 |
# 處理文件列表
|
| 1573 |
+
files = file if isinstance(file, list) else [file]
|
| 1574 |
+
all_docs = []
|
| 1575 |
+
file_names = []
|
| 1576 |
+
docs_by_file = []
|
| 1577 |
|
|
|
|
| 1578 |
for f in files:
|
| 1579 |
try:
|
| 1580 |
path = get_file_path_tab6(f)
|
| 1581 |
if not path:
|
|
|
|
| 1582 |
continue
|
| 1583 |
+
|
|
|
|
| 1584 |
file_names.append(os.path.basename(path))
|
| 1585 |
|
| 1586 |
+
# 根據文件類型選擇加載器
|
| 1587 |
if path.lower().endswith('.pdf'):
|
| 1588 |
loader = PyPDFLoader(path)
|
| 1589 |
elif path.lower().endswith('.docx'):
|
| 1590 |
loader = UnstructuredWordDocumentLoader(path)
|
| 1591 |
else:
|
| 1592 |
loader = TextLoader(path)
|
|
|
|
|
|
|
| 1593 |
|
| 1594 |
+
docs = loader.load()
|
| 1595 |
+
if docs:
|
| 1596 |
+
text = "\n".join(doc.page_content for doc in docs if hasattr(doc, 'page_content'))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1597 |
docs_by_file.append(text)
|
| 1598 |
all_docs.extend(docs)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1599 |
except Exception as e:
|
| 1600 |
print(f"ERROR processing file: {str(e)}")
|
| 1601 |
continue
|
| 1602 |
|
| 1603 |
+
if not docs_by_file:
|
| 1604 |
+
return general_run({"query": query})["answer"]
|
| 1605 |
|
| 1606 |
+
# 建立檢索器
|
| 1607 |
try:
|
| 1608 |
chunks = RecursiveCharacterTextSplitter(
|
| 1609 |
chunk_size=500,
|
|
|
|
| 1611 |
).split_documents(all_docs)
|
| 1612 |
|
| 1613 |
db = FAISS.from_documents(chunks, embeddings)
|
| 1614 |
+
retriever = db.as_retriever(search_kwargs={"k": 5})
|
|
|
|
|
|
|
|
|
|
| 1615 |
|
| 1616 |
global session_retriever, session_qa_chain
|
| 1617 |
session_retriever = retriever
|
|
|
|
| 1629 |
|
| 1630 |
# 解析查詢意圖
|
| 1631 |
parsed = parse_query(query)
|
| 1632 |
+
|
| 1633 |
+
# 如果是複雜查詢(比較、關聯分析等),使用 execute_multi_agent
|
|
|
|
| 1634 |
if needs_multi_agent_processing(query, parsed, docs_by_file):
|
| 1635 |
+
return execute_multi_agent(parsed, docs_by_file, file_names)
|
|
|
|
| 1636 |
|
| 1637 |
# 使用 LangGraph 處理一般查詢
|
|
|
|
|
|
|
| 1638 |
state = {
|
| 1639 |
"query": query,
|
| 1640 |
"file_names": file_names,
|
|
|
|
| 1642 |
"retriever": retriever
|
| 1643 |
}
|
| 1644 |
|
| 1645 |
+
graph = build_langgraph_pipeline()
|
| 1646 |
result = graph.invoke(state)
|
| 1647 |
+
|
| 1648 |
+
# 處理結果
|
| 1649 |
+
if isinstance(result, dict):
|
| 1650 |
+
if "answer" in result:
|
| 1651 |
+
return result["answer"]
|
| 1652 |
+
elif "summary" in result:
|
| 1653 |
+
return result["summary"]
|
| 1654 |
+
elif session_qa_chain:
|
| 1655 |
+
try:
|
| 1656 |
+
return session_qa_chain.run(query)
|
| 1657 |
+
except Exception as e:
|
| 1658 |
+
print(f"ERROR in QA chain: {str(e)}")
|
| 1659 |
+
|
| 1660 |
+
return "I apologize, but I couldn't process your query properly."
|
| 1661 |
|
| 1662 |
except Exception as e:
|
| 1663 |
print(f"ERROR in main function: {str(e)}")
|
| 1664 |
+
return f"I apologize, but I encountered an error: {str(e)}"
|
| 1665 |
|
| 1666 |
def needs_multi_agent_processing(query: str, parsed: dict, docs: list) -> bool:
|
| 1667 |
return any([
|
|
|
|
| 1678 |
])
|
| 1679 |
])
|
| 1680 |
|
| 1681 |
+
def needs_multi_agent_processing(query: str, parsed: dict, docs: list) -> bool:
|
| 1682 |
+
"""判斷是否需要多代理處理"""
|
| 1683 |
+
return any([
|
| 1684 |
+
parsed.get("summarize_files"),
|
| 1685 |
+
parsed.get("compare_files"),
|
| 1686 |
+
parsed.get("find_relations"),
|
| 1687 |
+
len(docs) > 1 and any(x in query.lower() for x in [
|
| 1688 |
+
"compare", "between", "both", "relation",
|
| 1689 |
+
"project", "similar", "different"
|
| 1690 |
+
])
|
| 1691 |
+
])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1692 |
|
| 1693 |
def process_result(result: dict, query: str) -> str:
|
| 1694 |
"""處理查詢結果"""
|