ChienChung commited on
Commit
00fbb89
·
verified ·
1 Parent(s): b56ff29

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -37
app.py CHANGED
@@ -1390,49 +1390,45 @@ def get_file_path_tab6(file):
1390
  # 修改後的 Tab6 主執行函式
1391
  def langgraph_tab6_main(query: str, file=None):
1392
  try:
1393
- # 僅支援單檔案上傳:如果 file 為列表,取第一個檔案
1394
- if isinstance(file, list):
1395
- file = file[0] if file else None
1396
-
1397
- file_names = []
1398
- retriever = None # 預設沒有文件內容
1399
- if file is not None:
1400
- # 取得檔案路徑(使用原有的 get_file_path 函式,不做修改)
1401
- path = get_file_path(file)
1402
- if path:
1403
- file_names.append(os.path.basename(path))
1404
- # 根據檔案副檔名選擇 Loader
1405
- if path.lower().endswith(".pdf"):
1406
- loader = PyPDFLoader(path)
1407
- elif path.lower().endswith(".docx"):
1408
- loader = UnstructuredWordDocumentLoader(path)
1409
- else:
1410
- loader = TextLoader(path)
1411
- docs = loader.load()
1412
- # 若成功讀取到內容,建立檢索器
1413
- if docs:
1414
- chunks = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50).split_documents(docs)
1415
- db = FAISS.from_documents(chunks, embeddings)
1416
- retriever = db.as_retriever()
1417
- # 將建立好的 retriever設定到全域 session_retriever,
1418
- # 以供 Summarisation 或 Document QA 任務使用
1419
- global session_retriever
1420
- session_retriever = retriever
1421
- else:
1422
- print("Loader did not return any document content from:", path)
1423
  else:
1424
- print("get_file_path returned None for the uploaded file.")
 
 
 
 
 
 
 
 
1425
  else:
1426
- print("No file uploaded.")
1427
-
 
 
 
 
 
1428
  # 建立 LangGraph 流程
1429
  graph = build_langgraph_pipeline()
1430
- # state 中包含查詢與檔案名稱(若有)
1431
  state = {"query": query, "file_names": file_names}
1432
- # 如果有 retriever,放入 state(下游任務可檢查 state 中的 retriever)
1433
  if retriever is not None:
1434
  state["retriever"] = retriever
1435
-
1436
  result = graph.invoke(state)
1437
  if "answer" in result:
1438
  return result["answer"]
@@ -1589,7 +1585,7 @@ demo = gr.TabbedInterface(
1589
  fn=langgraph_tab6_main,
1590
  inputs=[
1591
  gr.Textbox(label="Ask anything"),
1592
- gr.File(label="Upload one or more files", file_types=[".pdf", ".txt", ".docx"], file_count="single")
1593
  ],
1594
  outputs="text",
1595
  title="LangGraph GPT-like QA (Tab6)",
 
1390
  # 修改後的 Tab6 主執行函式
1391
  def langgraph_tab6_main(query: str, file=None):
1392
  try:
1393
+ # 以多檔案模式處理上傳檔案
1394
+ files = file if isinstance(file, list) else [file] if file else []
1395
+ all_docs, file_names = [], []
1396
+ for f in files:
1397
+ # 使用原有的 get_file_path 函式
1398
+ path = get_file_path(f)
1399
+ if not path:
1400
+ print("get_file_path returned None for file:", f)
1401
+ continue
1402
+ file_names.append(os.path.basename(path))
1403
+ print("Processing file:", path)
1404
+ # 根據副檔名選擇 Loader
1405
+ if path.lower().endswith(".pdf"):
1406
+ loader = PyPDFLoader(path)
1407
+ elif path.lower().endswith(".docx"):
1408
+ loader = UnstructuredWordDocumentLoader(path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1409
  else:
1410
+ loader = TextLoader(path)
1411
+ docs = loader.load()
1412
+ print("Docs loaded from", path, ":", docs)
1413
+ all_docs.extend(docs)
1414
+
1415
+ # 建立檢索器,合併所有檔案內容
1416
+ if not all_docs:
1417
+ print("No document content read. file_names:", file_names)
1418
+ retriever = None
1419
  else:
1420
+ chunks = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50).split_documents(all_docs)
1421
+ db = FAISS.from_documents(chunks, embeddings)
1422
+ retriever = db.as_retriever()
1423
+ # 設定全域 session_retriever,供文件相關工具使用
1424
+ global session_retriever
1425
+ session_retriever = retriever
1426
+
1427
  # 建立 LangGraph 流程
1428
  graph = build_langgraph_pipeline()
 
1429
  state = {"query": query, "file_names": file_names}
 
1430
  if retriever is not None:
1431
  state["retriever"] = retriever
 
1432
  result = graph.invoke(state)
1433
  if "answer" in result:
1434
  return result["answer"]
 
1585
  fn=langgraph_tab6_main,
1586
  inputs=[
1587
  gr.Textbox(label="Ask anything"),
1588
+ gr.File(label="Upload one or more files", file_types=[".pdf", ".txt", ".docx"], file_count="multiple")
1589
  ],
1590
  outputs="text",
1591
  title="LangGraph GPT-like QA (Tab6)",