Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1364,11 +1364,17 @@ from tempfile import mkdtemp
|
|
| 1364 |
import os
|
| 1365 |
from tempfile import mkdtemp
|
| 1366 |
|
| 1367 |
-
|
|
|
|
|
|
|
|
|
|
| 1368 |
def get_file_path_tab6(file):
|
|
|
|
| 1369 |
if isinstance(file, str):
|
| 1370 |
return file
|
|
|
|
| 1371 |
elif isinstance(file, dict):
|
|
|
|
| 1372 |
if file.get("data"):
|
| 1373 |
temp_dir = mkdtemp()
|
| 1374 |
file_name = file.get("name", "uploaded_file")
|
|
@@ -1377,7 +1383,9 @@ def get_file_path_tab6(file):
|
|
| 1377 |
f.write(file["data"])
|
| 1378 |
return file_path
|
| 1379 |
else:
|
|
|
|
| 1380 |
return file.get("name")
|
|
|
|
| 1381 |
elif hasattr(file, "save"):
|
| 1382 |
temp_dir = mkdtemp()
|
| 1383 |
file_path = os.path.join(temp_dir, file.name)
|
|
@@ -1386,18 +1394,20 @@ def get_file_path_tab6(file):
|
|
| 1386 |
else:
|
| 1387 |
return None
|
| 1388 |
|
|
|
|
| 1389 |
def langgraph_tab6_main(query: str, file=None):
|
| 1390 |
try:
|
|
|
|
| 1391 |
files = file if isinstance(file, list) else [file] if file else []
|
| 1392 |
all_docs, file_names = [], []
|
| 1393 |
for f in files:
|
| 1394 |
-
# 使用專用於 Tab6 的
|
| 1395 |
-
path = get_file_path_tab6(f)
|
| 1396 |
if not path:
|
| 1397 |
print("get_file_path_tab6 returned None for file:", f)
|
| 1398 |
continue
|
| 1399 |
file_names.append(os.path.basename(path))
|
| 1400 |
print("Tab6 Processing file:", path)
|
|
|
|
| 1401 |
if path.lower().endswith(".pdf"):
|
| 1402 |
loader = PyPDFLoader(path)
|
| 1403 |
elif path.lower().endswith(".docx"):
|
|
@@ -1407,22 +1417,25 @@ def langgraph_tab6_main(query: str, file=None):
|
|
| 1407 |
docs = loader.load()
|
| 1408 |
print("Docs loaded from", path, ":", docs)
|
| 1409 |
all_docs.extend(docs)
|
| 1410 |
-
|
| 1411 |
-
|
| 1412 |
-
retriever = None
|
| 1413 |
-
else:
|
| 1414 |
chunks = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50).split_documents(all_docs)
|
| 1415 |
db = FAISS.from_documents(chunks, embeddings)
|
| 1416 |
retriever = db.as_retriever()
|
| 1417 |
-
# 將建立好的 retriever 指派到全域變數 session_retriever,
|
| 1418 |
-
# 以便 Summarise 與 Document QA 工具內部可以檢查到有上傳文件
|
| 1419 |
global session_retriever
|
| 1420 |
session_retriever = retriever
|
| 1421 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1422 |
graph = build_langgraph_pipeline()
|
| 1423 |
state = {"query": query, "file_names": file_names}
|
|
|
|
| 1424 |
if retriever is not None:
|
| 1425 |
state["retriever"] = retriever
|
|
|
|
| 1426 |
result = graph.invoke(state)
|
| 1427 |
if "answer" in result:
|
| 1428 |
return result["answer"]
|
|
|
|
| 1364 |
import os
|
| 1365 |
from tempfile import mkdtemp
|
| 1366 |
|
| 1367 |
+
import os
|
| 1368 |
+
from tempfile import mkdtemp
|
| 1369 |
+
|
| 1370 |
+
# 專用於 Tab6 的檔案路徑取得函式(僅用於 Tab6,不影響其他地方)
|
| 1371 |
def get_file_path_tab6(file):
|
| 1372 |
+
# 如果檔案是字串,直接回傳
|
| 1373 |
if isinstance(file, str):
|
| 1374 |
return file
|
| 1375 |
+
# 如果檔案是字典型態(Gradio 上傳後通常為此格式)
|
| 1376 |
elif isinstance(file, dict):
|
| 1377 |
+
# 若有 "data" 欄位,表示有檔案內容(bytes),就寫入臨時檔案
|
| 1378 |
if file.get("data"):
|
| 1379 |
temp_dir = mkdtemp()
|
| 1380 |
file_name = file.get("name", "uploaded_file")
|
|
|
|
| 1383 |
f.write(file["data"])
|
| 1384 |
return file_path
|
| 1385 |
else:
|
| 1386 |
+
# 若無 "data",則直接回傳 "name" 欄位
|
| 1387 |
return file.get("name")
|
| 1388 |
+
# 如果是具備 save 方法的物件,則使用 save 儲存後回傳路徑
|
| 1389 |
elif hasattr(file, "save"):
|
| 1390 |
temp_dir = mkdtemp()
|
| 1391 |
file_path = os.path.join(temp_dir, file.name)
|
|
|
|
| 1394 |
else:
|
| 1395 |
return None
|
| 1396 |
|
| 1397 |
+
# 修改後的 Tab6 主執行函式
|
| 1398 |
def langgraph_tab6_main(query: str, file=None):
|
| 1399 |
try:
|
| 1400 |
+
# 先處理上傳檔案(支援多檔案上傳)
|
| 1401 |
files = file if isinstance(file, list) else [file] if file else []
|
| 1402 |
all_docs, file_names = [], []
|
| 1403 |
for f in files:
|
| 1404 |
+
path = get_file_path_tab6(f) # 使用專用於 Tab6 的函式
|
|
|
|
| 1405 |
if not path:
|
| 1406 |
print("get_file_path_tab6 returned None for file:", f)
|
| 1407 |
continue
|
| 1408 |
file_names.append(os.path.basename(path))
|
| 1409 |
print("Tab6 Processing file:", path)
|
| 1410 |
+
# 根據副檔名選擇 Loader
|
| 1411 |
if path.lower().endswith(".pdf"):
|
| 1412 |
loader = PyPDFLoader(path)
|
| 1413 |
elif path.lower().endswith(".docx"):
|
|
|
|
| 1417 |
docs = loader.load()
|
| 1418 |
print("Docs loaded from", path, ":", docs)
|
| 1419 |
all_docs.extend(docs)
|
| 1420 |
+
# 若成功讀取到文件內容,建立檢索器
|
| 1421 |
+
if all_docs:
|
|
|
|
|
|
|
| 1422 |
chunks = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50).split_documents(all_docs)
|
| 1423 |
db = FAISS.from_documents(chunks, embeddings)
|
| 1424 |
retriever = db.as_retriever()
|
| 1425 |
+
# 將建立好的 retriever 指派到全域變數 session_retriever,供 Summarise/Document QA 工具使用
|
|
|
|
| 1426 |
global session_retriever
|
| 1427 |
session_retriever = retriever
|
| 1428 |
+
else:
|
| 1429 |
+
print("No document content read. file_names:", file_names)
|
| 1430 |
+
retriever = None
|
| 1431 |
+
|
| 1432 |
+
# 建立 LangGraph 流程
|
| 1433 |
graph = build_langgraph_pipeline()
|
| 1434 |
state = {"query": query, "file_names": file_names}
|
| 1435 |
+
# 若有 retriever,將其放入 state,讓下游工具(例如 Document QA 或 Summarise)能從 state 中取用
|
| 1436 |
if retriever is not None:
|
| 1437 |
state["retriever"] = retriever
|
| 1438 |
+
|
| 1439 |
result = graph.invoke(state)
|
| 1440 |
if "answer" in result:
|
| 1441 |
return result["answer"]
|