ChienChung commited on
Commit
6c70f02
·
verified ·
1 Parent(s): a4b0a13

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +172 -138
app.py CHANGED
@@ -1282,6 +1282,120 @@ def parse_query(query: str) -> dict:
1282
  "find_relations": False
1283
  }
1284
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1285
  def compare_documents(doc1: str, doc2: str, file1_name: str, file2_name: str) -> str:
1286
  try:
1287
  prompt = f"""Compare the following two documents and identify their main differences:
@@ -1323,73 +1437,10 @@ def find_document_relations(docs: list, file_names: list) -> str:
1323
  response = llm_gpt4.invoke(prompt)
1324
  return response.content
1325
 
1326
- def execute_multi_agent(parsed: dict, docs: list, file_names: list) -> str:
1327
- try:
1328
- results = {}
1329
-
1330
- # 處理直接的比較請求
1331
- if len(docs) == 2:
1332
- comparison = compare_documents(
1333
- docs[0], docs[1],
1334
- file_names[0], file_names[1]
1335
- )
1336
- results["comparisons"] = comparison
1337
-
1338
- # 處理其他分析
1339
- if parsed.get("summarize_files"):
1340
- summaries = []
1341
- for idx in parsed["summarize_files"]:
1342
- if idx < len(docs):
1343
- summary = document_summarize(docs[idx])
1344
- summaries.append(f"Document {file_names[idx]} summary:\n{summary}")
1345
- results["summaries"] = "\n\n".join(summaries)
1346
-
1347
- if parsed.get("find_relations"):
1348
- results["relations"] = find_document_relations(docs, file_names)
1349
-
1350
- # 融合結果
1351
- fusion_prompt = f"""Based on the following analysis results, provide a comprehensive answer:
1352
-
1353
- Comparison Results:
1354
- {results.get('comparisons', 'No comparison performed')}
1355
-
1356
- Summary Results:
1357
- {results.get('summaries', 'No summaries generated')}
1358
-
1359
- Relationship Analysis:
1360
- {results.get('relations', 'No relationship analysis performed')}
1361
-
1362
- Please provide a coherent response that covers all important findings.
1363
- """
1364
-
1365
- final_response = llm_gpt4.invoke(fusion_prompt)
1366
- return final_response.content if hasattr(final_response, 'content') else str(final_response)
1367
-
1368
- except Exception as e:
1369
- print(f"ERROR in execute_multi_agent: {str(e)}")
1370
- return f"Error occurred while processing the documents: {str(e)}"
1371
 
1372
  # === AutoGen 多代理人協作邏輯 ===
1373
- def autogen_multi_intent_agent(query: str, docs: list) -> str:
1374
- try:
1375
- context = "\n\n".join(d.page_content for d in docs[:10])
1376
- system_prompt = f"""You are a helpful assistant. Your task is to answer the following user question using two strategies:
1377
- 1. Use context-based question answering based on the document below.
1378
- 2. Also generate a short summary of the document, in case that helps interpret the question.
1379
 
1380
- Document Context:
1381
- {context}
1382
- """
1383
- user_proxy = UserProxyAgent(name="User", is_termination_msg=lambda x: True, human_input_mode="NEVER")
1384
- qa_agent = AssistantAgent(name="QA_Agent", system_message="You are great at document-based QA.")
1385
- sum_agent = AssistantAgent(name="Summary_Agent", system_message="You are great at summarising text.")
1386
- group_chat = GroupChat(agents=[user_proxy, qa_agent, sum_agent], messages=[], max_round=3)
1387
- manager = GroupChatManager(groupchat=group_chat, llm_config={"config_list": [{"model": "gpt-4o", "api_key": openai_api_key}]})
1388
-
1389
- user_proxy.initiate_chat(manager, message=query)
1390
- return user_proxy.last_message()["content"]
1391
- except Exception as e:
1392
- return f"[AutoGen Error] {e}"
1393
 
1394
  def detect_intent_embedding(query, file_names=[]):
1395
  query_emb = embedding_model.encode(query, normalize_embeddings=True)
@@ -1443,18 +1494,6 @@ search_task = Task(
1443
  )
1444
 
1445
  # === LangGraph 節點函數 ===
1446
- def docqa_run(state):
1447
- result = document_qa_agent.execute_task(docqa_task, {"query": state["query"]})
1448
- if isinstance(result, str):
1449
- output = result.lower()
1450
- else:
1451
- output = result.output.lower()
1452
- if any(x in output for x in ["no relevant info", "not found", "no answer"]):
1453
- return general_run(state)
1454
- if isinstance(result, str):
1455
- return {"answer": result}
1456
- else:
1457
- return {"answer": result.output}
1458
 
1459
  def general_run(state):
1460
  """改用直接 LLM 回答取代 General Agent"""
@@ -1471,12 +1510,49 @@ def general_run(state):
1471
  print(f"ERROR in general_run: {str(e)}")
1472
  return {"answer": "I apologize, but I'm having trouble processing your request."}
1473
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1474
  def summariser_run(state):
1475
- result = summarizer_agent.execute_task(summariser_task, {"query": state["query"]})
1476
- if isinstance(result, str):
1477
- return {"summary": result}
1478
- else:
1479
- return {"summary": result.output}
 
 
 
 
 
 
 
 
 
 
 
1480
 
1481
  # === LangGraph 定義 ===
1482
  def build_langgraph_pipeline():
@@ -1566,6 +1642,7 @@ def langgraph_tab6_main(query: str, file=None):
1566
  file_names = []
1567
  docs_by_file = []
1568
 
 
1569
  for f in files:
1570
  try:
1571
  path = get_file_path_tab6(f)
@@ -1618,14 +1695,11 @@ def langgraph_tab6_main(query: str, file=None):
1618
  print(f"ERROR setting up retriever: {str(e)}")
1619
  retriever = None
1620
 
1621
- # 解析查詢意圖
1622
- parsed = parse_query(query)
1623
-
1624
- # 如果是複雜查詢(比較、關聯分析等),使用 execute_multi_agent
1625
- if needs_multi_agent_processing(query, parsed, docs_by_file):
1626
- return execute_multi_agent(parsed, docs_by_file, file_names)
1627
-
1628
- # 使用 LangGraph 處理一般查詢
1629
  state = {
1630
  "query": query,
1631
  "file_names": file_names,
@@ -1633,22 +1707,14 @@ def langgraph_tab6_main(query: str, file=None):
1633
  "retriever": retriever
1634
  }
1635
 
1636
- graph = build_langgraph_pipeline()
1637
- result = graph.invoke(state)
1638
-
1639
- # 處理結果
1640
- if isinstance(result, dict):
1641
- if "answer" in result:
1642
- return result["answer"]
1643
- elif "summary" in result:
1644
- return result["summary"]
1645
- elif session_qa_chain:
1646
- try:
1647
- return session_qa_chain.run(query)
1648
- except Exception as e:
1649
- print(f"ERROR in QA chain: {str(e)}")
1650
-
1651
- return "I apologize, but I couldn't process your query properly."
1652
 
1653
  except Exception as e:
1654
  print(f"ERROR in main function: {str(e)}")
@@ -1656,39 +1722,7 @@ def langgraph_tab6_main(query: str, file=None):
1656
 
1657
 
1658
 
1659
- def needs_multi_agent_processing(query: str, parsed: dict, docs: list) -> bool:
1660
- """判斷是否需要多代理處理"""
1661
- # 檢查是否為比較或關聯分析查詢
1662
- comparison_keywords = ["compare", "difference", "differences", "between", "similar", "similarity"]
1663
- is_comparison_query = any(keyword in query.lower() for keyword in comparison_keywords)
1664
-
1665
- # 如果是比較查詢且有多個文件,直接返回 True
1666
- if is_comparison_query and len(docs) > 1:
1667
- return True
1668
-
1669
- return any([
1670
- parsed.get("summarize_files"),
1671
- parsed.get("compare_files"),
1672
- parsed.get("find_relations"),
1673
- len(docs) > 1 and any(x in query.lower() for x in [
1674
- "both", "relation", "project", "connection"
1675
- ])
1676
- ])
1677
-
1678
- def process_result(result: dict, query: str) -> str:
1679
- """處理查詢結果"""
1680
- if isinstance(result, dict):
1681
- if "answer" in result:
1682
- return result["answer"]
1683
- elif "summary" in result:
1684
- return result["summary"]
1685
- elif session_qa_chain:
1686
- try:
1687
- return session_qa_chain.run(query)
1688
- except Exception as e:
1689
- print(f"ERROR in QA chain: {str(e)}")
1690
-
1691
- return "I apologize, but I couldn't find a relevant answer in the documents."
1692
 
1693
  # Gradio Interface Settings
1694
  demo_description = """
 
1282
  "find_relations": False
1283
  }
1284
 
1285
+ def analyze_documents(query: str, docs: list, file_names: list) -> str:
1286
+ """通用文件分析函數,能處理各種類型的查詢"""
1287
+ try:
1288
+ # 準備文件上下文
1289
+ context = "\n\n".join(
1290
+ f"Document {name}:\n{doc[:2000]}..."
1291
+ for name, doc in zip(file_names, docs)
1292
+ )
1293
+
1294
+ # 構建更通用的提示詞
1295
+ prompt = f"""Analyze the following documents and answer the query.
1296
+
1297
+ Query: {query}
1298
+
1299
+ Documents:
1300
+ {context}
1301
+
1302
+ Instructions:
1303
+ 1. Understand the specific requirements of the query
1304
+ 2. Analyze the relevant parts of the documents
1305
+ 3. Consider all possible relationships and connections
1306
+ 4. Provide a direct and specific answer
1307
+ 5. Support your answer with evidence from the documents
1308
+
1309
+ Important:
1310
+ - If the query asks about specific content, find and quote relevant parts
1311
+ - If comparing documents, identify relevant similarities and differences
1312
+ - If looking for relationships, analyze all possible connections
1313
+ - Always provide evidence for your conclusions
1314
+ - Be precise and direct in your answer
1315
+
1316
+ Format your response to directly address the query while providing supporting evidence.
1317
+ """
1318
+
1319
+ response = llm_gpt4.invoke(prompt)
1320
+ return response.content if hasattr(response, 'content') else str(response)
1321
+
1322
+ except Exception as e:
1323
+ print(f"ERROR in analyze_documents: {str(e)}")
1324
+ return f"Error analyzing documents: {str(e)}"
1325
+
1326
+ def langgraph_tab6_main(query: str, file=None):
1327
+ try:
1328
+ print(f"DEBUG: Starting processing with query: {query}")
1329
+
1330
+ # 如果沒有文件,直接使用 general_run
1331
+ if not file:
1332
+ return general_run({"query": query})["answer"]
1333
+
1334
+ # 處理文件列表
1335
+ files = file if isinstance(file, list) else [file]
1336
+ all_docs = []
1337
+ file_names = []
1338
+ docs_by_file = []
1339
+
1340
+ # 處理上傳的文件
1341
+ for f in files:
1342
+ try:
1343
+ path = get_file_path_tab6(f)
1344
+ if not path:
1345
+ continue
1346
+
1347
+ file_names.append(os.path.basename(path))
1348
+
1349
+ # 根據文件類型選擇加載器
1350
+ if path.lower().endswith('.pdf'):
1351
+ loader = PyPDFLoader(path)
1352
+ elif path.lower().endswith('.docx'):
1353
+ loader = UnstructuredWordDocumentLoader(path)
1354
+ else:
1355
+ loader = TextLoader(path)
1356
+
1357
+ docs = loader.load()
1358
+ if docs:
1359
+ text = "\n".join(doc.page_content for doc in docs if hasattr(doc, 'page_content'))
1360
+ docs_by_file.append(text)
1361
+ all_docs.extend(docs)
1362
+ except Exception as e:
1363
+ print(f"ERROR processing file: {str(e)}")
1364
+ continue
1365
+
1366
+ if not docs_by_file:
1367
+ return general_run({"query": query})["answer"]
1368
+
1369
+ # 建立檢索器
1370
+ try:
1371
+ chunks = RecursiveCharacterTextSplitter(
1372
+ chunk_size=500,
1373
+ chunk_overlap=50
1374
+ ).split_documents(all_docs)
1375
+
1376
+ db = FAISS.from_documents(chunks, embeddings)
1377
+ retriever = db.as_retriever(search_kwargs={"k": 5})
1378
+
1379
+ global session_retriever, session_qa_chain
1380
+ session_retriever = retriever
1381
+ session_qa_chain = ConversationalRetrievalChain.from_llm(
1382
+ llm=llm_gpt4,
1383
+ retriever=retriever,
1384
+ memory=ConversationBufferMemory(
1385
+ memory_key="chat_history",
1386
+ return_messages=True
1387
+ ),
1388
+ )
1389
+ except Exception as e:
1390
+ print(f"ERROR setting up retriever: {str(e)}")
1391
+ retriever = None
1392
+
1393
+ # 使用通用分析函數處理查詢
1394
+ return analyze_documents(query, docs_by_file, file_names)
1395
+
1396
+ except Exception as e:
1397
+ print(f"ERROR in main function: {str(e)}")
1398
+ return f"I apologize, but I encountered an error: {str(e)}"
1399
  def compare_documents(doc1: str, doc2: str, file1_name: str, file2_name: str) -> str:
1400
  try:
1401
  prompt = f"""Compare the following two documents and identify their main differences:
 
1437
  response = llm_gpt4.invoke(prompt)
1438
  return response.content
1439
 
1440
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1441
 
1442
  # === AutoGen 多代理人協作邏輯 ===
 
 
 
 
 
 
1443
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1444
 
1445
  def detect_intent_embedding(query, file_names=[]):
1446
  query_emb = embedding_model.encode(query, normalize_embeddings=True)
 
1494
  )
1495
 
1496
  # === LangGraph 節點函數 ===
 
 
 
 
 
 
 
 
 
 
 
 
1497
 
1498
  def general_run(state):
1499
  """改用直接 LLM 回答取代 General Agent"""
 
1510
  print(f"ERROR in general_run: {str(e)}")
1511
  return {"answer": "I apologize, but I'm having trouble processing your request."}
1512
 
1513
+
1514
+ def docqa_run(state):
1515
+ """文件問答處理"""
1516
+ try:
1517
+ # 如果有檢索器,使用檢索器
1518
+ if "retriever" in state:
1519
+ relevant_docs = state["retriever"].get_relevant_documents(state["query"])
1520
+ context = "\n".join(d.page_content for d in relevant_docs)
1521
+ else:
1522
+ context = "\n".join(state["docs"])
1523
+
1524
+ prompt = f"""Based on the following context, please answer the question:
1525
+ Question: {state["query"]}
1526
+
1527
+ Context:
1528
+ {context[:3000]}
1529
+
1530
+ Provide a detailed and accurate answer based on the context."""
1531
+
1532
+ response = llm_gpt4.invoke(prompt)
1533
+ return {"answer": response.content if hasattr(response, 'content') else str(response)}
1534
+ except Exception as e:
1535
+ print(f"ERROR in docqa_run: {str(e)}")
1536
+ return general_run(state)
1537
+
1538
+
1539
  def summariser_run(state):
1540
+ """文件摘要處理"""
1541
+ try:
1542
+ context = "\n".join(state["docs"])
1543
+ prompt = f"""Please provide a comprehensive summary of the following document:
1544
+ {context[:3000]}
1545
+
1546
+ Focus on:
1547
+ 1. Main topics and key points
1548
+ 2. Important findings or conclusions
1549
+ 3. Significant details"""
1550
+
1551
+ response = llm_gpt4.invoke(prompt)
1552
+ return {"summary": response.content if hasattr(response, 'content') else str(response)}
1553
+ except Exception as e:
1554
+ print(f"ERROR in summariser_run: {str(e)}")
1555
+ return {"summary": "Error generating summary."}
1556
 
1557
  # === LangGraph 定義 ===
1558
  def build_langgraph_pipeline():
 
1642
  file_names = []
1643
  docs_by_file = []
1644
 
1645
+ # 處理上傳的文件
1646
  for f in files:
1647
  try:
1648
  path = get_file_path_tab6(f)
 
1695
  print(f"ERROR setting up retriever: {str(e)}")
1696
  retriever = None
1697
 
1698
+ # 檢測是否為多文件查詢
1699
+ if len(docs_by_file) > 1:
1700
+ return analyze_documents(query, docs_by_file, file_names)
1701
+
1702
+ # 使用 LangGraph 處理單文件查詢
 
 
 
1703
  state = {
1704
  "query": query,
1705
  "file_names": file_names,
 
1707
  "retriever": retriever
1708
  }
1709
 
1710
+ # 根據查詢意圖選擇處理方式
1711
+ intent = detect_intent_embedding(query, file_names)
1712
+ if intent == "Summarise":
1713
+ return summariser_run(state)["summary"]
1714
+ elif intent == "DocQA":
1715
+ return docqa_run(state)["answer"]
1716
+ else:
1717
+ return general_run(state)["answer"]
 
 
 
 
 
 
 
 
1718
 
1719
  except Exception as e:
1720
  print(f"ERROR in main function: {str(e)}")
 
1722
 
1723
 
1724
 
1725
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1726
 
1727
  # Gradio Interface Settings
1728
  demo_description = """