ChienChung commited on
Commit
2ff6e79
·
verified ·
1 Parent(s): 4fd8be5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +101 -92
app.py CHANGED
@@ -1452,128 +1452,137 @@ def build_langgraph_pipeline():
1452
  from tempfile import mkdtemp
1453
 
1454
 
 
 
 
1455
  def get_file_path_tab6(file):
1456
- """改進的文件路徑處理函數,專門處理 Gradio 上傳"""
1457
- try:
1458
- # 如果是 None
1459
- if file is None:
 
 
 
 
 
1460
  return None
1461
-
1462
- # 處理 Gradio 文件對象
1463
- if hasattr(file, 'name'):
1464
- return file.name
1465
-
1466
- # 如果是字典(Gradio 文件上傳的另一種格式)
1467
- if isinstance(file, dict):
1468
- if 'name' in file:
1469
- return file['name']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1470
  return None
1471
-
1472
- # 如果是字符串路徑
1473
- if isinstance(file, str):
1474
- # 檢查常見的上傳路徑
1475
- possible_paths = [
1476
- file,
1477
- os.path.join('/tmp/gradio/', file),
1478
- os.path.join(os.getcwd(), file),
1479
- os.path.abspath(file)
1480
- ]
1481
-
1482
- for path in possible_paths:
1483
- if os.path.exists(path):
1484
- return path
1485
-
1486
- # 如果找不到文件,返回原始路徑
1487
- return file
1488
-
1489
- return None
1490
- except Exception as e:
1491
- print(f"Error in get_file_path_tab6: {e}")
1492
  return None
1493
 
1494
  def langgraph_tab6_main(query: str, file=None):
1495
  try:
1496
- # 初始化文件處理
1497
  files = file if isinstance(file, list) else [file] if file else []
1498
- all_docs = []
1499
- file_names = []
1500
- docs_by_file = []
1501
 
1502
- # 處理每個文件
1503
  for f in files:
1504
- try:
1505
- # 獲取文件路徑
1506
- path = get_file_path_tab6(f)
1507
- if not path:
1508
- print(f"Could not get valid path for file: {f}")
1509
- continue
1510
-
1511
- print(f"Attempting to process file: {path}")
1512
-
1513
- # 根據文件類型選擇加載器
1514
- if path.lower().endswith(".pdf"):
1515
- from langchain.document_loaders import PyPDFLoader
1516
- loader = PyPDFLoader(path)
1517
- elif path.lower().endswith(".docx"):
1518
- from langchain.document_loaders import UnstructuredWordDocumentLoader
1519
- loader = UnstructuredWordDocumentLoader(path)
1520
- else:
1521
- from langchain.document_loaders import TextLoader
1522
- loader = TextLoader(path)
1523
-
1524
- # 加載文件
1525
- docs = loader.load()
1526
- if docs:
1527
- file_names.append(os.path.basename(path))
1528
- if hasattr(docs[0], "page_content"):
1529
- text = "\n".join([doc.page_content for doc in docs])
1530
- else:
1531
- text = "\n".join(docs)
1532
- docs_by_file.append(text)
1533
- all_docs.extend(docs)
1534
- print(f"Successfully processed file: {path}")
1535
- except Exception as e:
1536
- print(f"Error processing file {f}: {e}")
1537
  continue
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1538
 
1539
- # 檢查是否有成功處的文件
1540
  if not all_docs:
1541
- return "No valid documents could be processed. Please check your file and try again."
1542
-
1543
- # 其餘代碼保持不變...
1544
- chunks = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50).split_documents(all_docs)
1545
- db = FAISS.from_documents(chunks, embeddings)
1546
- retriever = db.as_retriever()
1547
-
1548
- global session_retriever
1549
- session_retriever = retriever
1550
- global session_qa_chain
1551
- session_qa_chain = ConversationalRetrievalChain.from_llm(
1552
- llm=llm_gpt4,
1553
- retriever=retriever,
1554
- memory=ConversationBufferMemory(memory_key="chat_history", return_messages=True),
1555
- )
1556
 
 
1557
  parsed = parse_query(query)
1558
  if (parsed.get("summarise") or parsed.get("compare")) and len(docs_by_file) > 0:
1559
  final_answer = execute_multi_agent(parsed, docs_by_file, file_names)
1560
  return final_answer
1561
 
 
1562
  graph = build_langgraph_pipeline()
1563
  state = {"query": query, "file_names": file_names}
1564
  if retriever is not None:
1565
  state["retriever"] = retriever
1566
-
1567
  result = graph.invoke(state)
1568
  if "answer" in result:
1569
  return result["answer"]
1570
  if "summary" in result:
1571
  return result["summary"]
1572
  return "No answer."
1573
-
1574
  except Exception as e:
1575
- print(f"Error in main function: {e}")
1576
- return f"[Tab6 Error] {str(e)}"
1577
 
1578
  # Gradio Interface Settings
1579
  demo_description = """
 
1452
  from tempfile import mkdtemp
1453
 
1454
 
1455
+ from tempfile import mkdtemp
1456
+ import os
1457
+
1458
  def get_file_path_tab6(file):
1459
+ # DEBUG: 印出接收到 file 物
1460
+ print("DEBUG: Received file object:", file)
1461
+
1462
+ # 如果傳入的是字串,確認該字串為存在的檔案路徑
1463
+ if isinstance(file, str):
1464
+ if os.path.exists(file):
1465
+ return file
1466
+ else:
1467
+ print("DEBUG: String file path does not exist:", file)
1468
  return None
1469
+ # 如果傳入的是字典(Gradio 上傳後常見格式)
1470
+ elif isinstance(file, dict):
1471
+ data = file.get("data")
1472
+ name = file.get("name")
1473
+ print("DEBUG: File dict - name:", name, "data type:", type(data))
1474
+ if data:
1475
+ # 如果 data 為字串且該路徑存在,就直接返回
1476
+ if isinstance(data, str) and os.path.exists(data):
1477
+ return data
1478
+ else:
1479
+ # 將 data 寫入臨時檔案
1480
+ temp_dir = mkdtemp()
1481
+ file_path = os.path.join(temp_dir, name if name else "uploaded_file")
1482
+ with open(file_path, "wb") as f:
1483
+ if isinstance(data, str):
1484
+ f.write(data.encode("utf-8"))
1485
+ else:
1486
+ f.write(data)
1487
+ # 檢查檔案是否成功寫入
1488
+ if os.path.exists(file_path):
1489
+ print("DEBUG: File successfully written to:", file_path)
1490
+ return file_path
1491
+ else:
1492
+ print("DEBUG: File not created at:", file_path)
1493
+ return None
1494
+ else:
1495
+ # 如果沒有 data,就返回 None 避免返回無效檔案名稱
1496
+ print("DEBUG: No data field in file dict")
1497
  return None
1498
+ # 如果是具有 .save 屬性的物件,直接呼叫 save 並返回檔案路徑
1499
+ elif hasattr(file, "save"):
1500
+ temp_dir = mkdtemp()
1501
+ file_path = os.path.join(temp_dir, file.name)
1502
+ file.save(file_path)
1503
+ if os.path.exists(file_path):
1504
+ print("DEBUG: File saved at:", file_path)
1505
+ return file_path
1506
+ else:
1507
+ print("DEBUG: File not saved at:", file_path)
1508
+ return None
1509
+ else:
1510
+ # 如果 file 有 .name 屬性,嘗試返回該屬性
1511
+ if hasattr(file, "name"):
1512
+ if os.path.exists(file.name):
1513
+ return file.name
1514
+ print("DEBUG: File type not recognized.")
 
 
 
 
1515
  return None
1516
 
1517
  def langgraph_tab6_main(query: str, file=None):
1518
  try:
1519
+ # 取得上傳檔案列表(多檔案模式)
1520
  files = file if isinstance(file, list) else [file] if file else []
1521
+ all_docs = [] # 用於建立檢索器
1522
+ file_names = [] # 檔案名稱列表
1523
+ docs_by_file = [] # 每份文件的完整內文
1524
 
 
1525
  for f in files:
1526
+ path = get_file_path_tab6(f) # 使用新版 get_file_path_tab6
1527
+ if not path:
1528
+ print("DEBUG: get_file_path_tab6 returned None for file:", f)
1529
+ continue
1530
+ if not os.path.exists(path):
1531
+ print("DEBUG: Returned path does not exist:", path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1532
  continue
1533
+ file_names.append(os.path.basename(path))
1534
+ print("DEBUG: Processing file:", path)
1535
+ if path.lower().endswith(".pdf"):
1536
+ loader = PyPDFLoader(path)
1537
+ elif path.lower().endswith(".docx"):
1538
+ loader = UnstructuredWordDocumentLoader(path)
1539
+ else:
1540
+ loader = TextLoader(path)
1541
+ docs = loader.load()
1542
+ print("DEBUG: Docs loaded from", path, ":", docs)
1543
+ if docs and hasattr(docs[0], "page_content"):
1544
+ text = "\n".join([doc.page_content for doc in docs])
1545
+ else:
1546
+ text = "\n".join(docs)
1547
+ docs_by_file.append(text)
1548
+ all_docs.extend(docs)
1549
 
1550
+ # 建立索器(用於非多代流程)
1551
  if not all_docs:
1552
+ print("DEBUG: No valid document content read. file_names:", file_names)
1553
+ retriever = None
1554
+ else:
1555
+ chunks = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50).split_documents(all_docs)
1556
+ db = FAISS.from_documents(chunks, embeddings)
1557
+ retriever = db.as_retriever()
1558
+ global session_retriever
1559
+ session_retriever = retriever
1560
+ global session_qa_chain
1561
+ session_qa_chain = ConversationalRetrievalChain.from_llm(
1562
+ llm=llm_gpt4,
1563
+ retriever=retriever,
1564
+ memory=ConversationBufferMemory(memory_key="chat_history", return_messages=True),
1565
+ )
 
1566
 
1567
+ # 解析查詢拆解子意圖
1568
  parsed = parse_query(query)
1569
  if (parsed.get("summarise") or parsed.get("compare")) and len(docs_by_file) > 0:
1570
  final_answer = execute_multi_agent(parsed, docs_by_file, file_names)
1571
  return final_answer
1572
 
1573
+ # 否則,走原有 LangGraph pipeline
1574
  graph = build_langgraph_pipeline()
1575
  state = {"query": query, "file_names": file_names}
1576
  if retriever is not None:
1577
  state["retriever"] = retriever
 
1578
  result = graph.invoke(state)
1579
  if "answer" in result:
1580
  return result["answer"]
1581
  if "summary" in result:
1582
  return result["summary"]
1583
  return "No answer."
 
1584
  except Exception as e:
1585
+ return f"[Tab6 Error] {e}"
 
1586
 
1587
  # Gradio Interface Settings
1588
  demo_description = """