Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1452,128 +1452,137 @@ def build_langgraph_pipeline():
|
|
| 1452 |
from tempfile import mkdtemp
|
| 1453 |
|
| 1454 |
|
|
|
|
|
|
|
|
|
|
| 1455 |
def get_file_path_tab6(file):
|
| 1456 |
-
|
| 1457 |
-
|
| 1458 |
-
|
| 1459 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1460 |
return None
|
| 1461 |
-
|
| 1462 |
-
|
| 1463 |
-
|
| 1464 |
-
|
| 1465 |
-
|
| 1466 |
-
|
| 1467 |
-
|
| 1468 |
-
if
|
| 1469 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1470 |
return None
|
| 1471 |
-
|
| 1472 |
-
|
| 1473 |
-
|
| 1474 |
-
|
| 1475 |
-
|
| 1476 |
-
|
| 1477 |
-
|
| 1478 |
-
|
| 1479 |
-
|
| 1480 |
-
|
| 1481 |
-
|
| 1482 |
-
|
| 1483 |
-
|
| 1484 |
-
|
| 1485 |
-
|
| 1486 |
-
|
| 1487 |
-
|
| 1488 |
-
|
| 1489 |
-
return None
|
| 1490 |
-
except Exception as e:
|
| 1491 |
-
print(f"Error in get_file_path_tab6: {e}")
|
| 1492 |
return None
|
| 1493 |
|
| 1494 |
def langgraph_tab6_main(query: str, file=None):
|
| 1495 |
try:
|
| 1496 |
-
#
|
| 1497 |
files = file if isinstance(file, list) else [file] if file else []
|
| 1498 |
-
all_docs = []
|
| 1499 |
-
file_names = []
|
| 1500 |
-
docs_by_file = []
|
| 1501 |
|
| 1502 |
-
# 處理每個文件
|
| 1503 |
for f in files:
|
| 1504 |
-
|
| 1505 |
-
|
| 1506 |
-
|
| 1507 |
-
|
| 1508 |
-
|
| 1509 |
-
|
| 1510 |
-
|
| 1511 |
-
print(f"Attempting to process file: {path}")
|
| 1512 |
-
|
| 1513 |
-
# 根據文件類型選擇加載器
|
| 1514 |
-
if path.lower().endswith(".pdf"):
|
| 1515 |
-
from langchain.document_loaders import PyPDFLoader
|
| 1516 |
-
loader = PyPDFLoader(path)
|
| 1517 |
-
elif path.lower().endswith(".docx"):
|
| 1518 |
-
from langchain.document_loaders import UnstructuredWordDocumentLoader
|
| 1519 |
-
loader = UnstructuredWordDocumentLoader(path)
|
| 1520 |
-
else:
|
| 1521 |
-
from langchain.document_loaders import TextLoader
|
| 1522 |
-
loader = TextLoader(path)
|
| 1523 |
-
|
| 1524 |
-
# 加載文件
|
| 1525 |
-
docs = loader.load()
|
| 1526 |
-
if docs:
|
| 1527 |
-
file_names.append(os.path.basename(path))
|
| 1528 |
-
if hasattr(docs[0], "page_content"):
|
| 1529 |
-
text = "\n".join([doc.page_content for doc in docs])
|
| 1530 |
-
else:
|
| 1531 |
-
text = "\n".join(docs)
|
| 1532 |
-
docs_by_file.append(text)
|
| 1533 |
-
all_docs.extend(docs)
|
| 1534 |
-
print(f"Successfully processed file: {path}")
|
| 1535 |
-
except Exception as e:
|
| 1536 |
-
print(f"Error processing file {f}: {e}")
|
| 1537 |
continue
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1538 |
|
| 1539 |
-
# 檢
|
| 1540 |
if not all_docs:
|
| 1541 |
-
|
| 1542 |
-
|
| 1543 |
-
|
| 1544 |
-
|
| 1545 |
-
|
| 1546 |
-
|
| 1547 |
-
|
| 1548 |
-
|
| 1549 |
-
|
| 1550 |
-
|
| 1551 |
-
|
| 1552 |
-
|
| 1553 |
-
|
| 1554 |
-
|
| 1555 |
-
)
|
| 1556 |
|
|
|
|
| 1557 |
parsed = parse_query(query)
|
| 1558 |
if (parsed.get("summarise") or parsed.get("compare")) and len(docs_by_file) > 0:
|
| 1559 |
final_answer = execute_multi_agent(parsed, docs_by_file, file_names)
|
| 1560 |
return final_answer
|
| 1561 |
|
|
|
|
| 1562 |
graph = build_langgraph_pipeline()
|
| 1563 |
state = {"query": query, "file_names": file_names}
|
| 1564 |
if retriever is not None:
|
| 1565 |
state["retriever"] = retriever
|
| 1566 |
-
|
| 1567 |
result = graph.invoke(state)
|
| 1568 |
if "answer" in result:
|
| 1569 |
return result["answer"]
|
| 1570 |
if "summary" in result:
|
| 1571 |
return result["summary"]
|
| 1572 |
return "No answer."
|
| 1573 |
-
|
| 1574 |
except Exception as e:
|
| 1575 |
-
|
| 1576 |
-
return f"[Tab6 Error] {str(e)}"
|
| 1577 |
|
| 1578 |
# Gradio Interface Settings
|
| 1579 |
demo_description = """
|
|
|
|
| 1452 |
from tempfile import mkdtemp
|
| 1453 |
|
| 1454 |
|
| 1455 |
+
from tempfile import mkdtemp
|
| 1456 |
+
import os
|
| 1457 |
+
|
| 1458 |
def get_file_path_tab6(file):
|
| 1459 |
+
# DEBUG: 印出接收到的 file 物件
|
| 1460 |
+
print("DEBUG: Received file object:", file)
|
| 1461 |
+
|
| 1462 |
+
# 如果傳入的是字串,確認該字串為存在的檔案路徑
|
| 1463 |
+
if isinstance(file, str):
|
| 1464 |
+
if os.path.exists(file):
|
| 1465 |
+
return file
|
| 1466 |
+
else:
|
| 1467 |
+
print("DEBUG: String file path does not exist:", file)
|
| 1468 |
return None
|
| 1469 |
+
# 如果傳入的是字典(Gradio 上傳後常見格式)
|
| 1470 |
+
elif isinstance(file, dict):
|
| 1471 |
+
data = file.get("data")
|
| 1472 |
+
name = file.get("name")
|
| 1473 |
+
print("DEBUG: File dict - name:", name, "data type:", type(data))
|
| 1474 |
+
if data:
|
| 1475 |
+
# 如果 data 為字串且該路徑存在,就直接返回
|
| 1476 |
+
if isinstance(data, str) and os.path.exists(data):
|
| 1477 |
+
return data
|
| 1478 |
+
else:
|
| 1479 |
+
# 將 data 寫入臨時檔案
|
| 1480 |
+
temp_dir = mkdtemp()
|
| 1481 |
+
file_path = os.path.join(temp_dir, name if name else "uploaded_file")
|
| 1482 |
+
with open(file_path, "wb") as f:
|
| 1483 |
+
if isinstance(data, str):
|
| 1484 |
+
f.write(data.encode("utf-8"))
|
| 1485 |
+
else:
|
| 1486 |
+
f.write(data)
|
| 1487 |
+
# 檢查檔案是否成功寫入
|
| 1488 |
+
if os.path.exists(file_path):
|
| 1489 |
+
print("DEBUG: File successfully written to:", file_path)
|
| 1490 |
+
return file_path
|
| 1491 |
+
else:
|
| 1492 |
+
print("DEBUG: File not created at:", file_path)
|
| 1493 |
+
return None
|
| 1494 |
+
else:
|
| 1495 |
+
# 如果沒有 data,就返回 None 避免返回無效檔案名稱
|
| 1496 |
+
print("DEBUG: No data field in file dict")
|
| 1497 |
return None
|
| 1498 |
+
# 如果是具有 .save 屬性的物件,直接呼叫 save 並返回檔案路徑
|
| 1499 |
+
elif hasattr(file, "save"):
|
| 1500 |
+
temp_dir = mkdtemp()
|
| 1501 |
+
file_path = os.path.join(temp_dir, file.name)
|
| 1502 |
+
file.save(file_path)
|
| 1503 |
+
if os.path.exists(file_path):
|
| 1504 |
+
print("DEBUG: File saved at:", file_path)
|
| 1505 |
+
return file_path
|
| 1506 |
+
else:
|
| 1507 |
+
print("DEBUG: File not saved at:", file_path)
|
| 1508 |
+
return None
|
| 1509 |
+
else:
|
| 1510 |
+
# 如果 file 有 .name 屬性,嘗試返回該屬性
|
| 1511 |
+
if hasattr(file, "name"):
|
| 1512 |
+
if os.path.exists(file.name):
|
| 1513 |
+
return file.name
|
| 1514 |
+
print("DEBUG: File type not recognized.")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1515 |
return None
|
| 1516 |
|
| 1517 |
def langgraph_tab6_main(query: str, file=None):
|
| 1518 |
try:
|
| 1519 |
+
# 取得上傳檔案列表(多檔案模式)
|
| 1520 |
files = file if isinstance(file, list) else [file] if file else []
|
| 1521 |
+
all_docs = [] # 用於建立檢索器
|
| 1522 |
+
file_names = [] # 檔案名稱列表
|
| 1523 |
+
docs_by_file = [] # 每份文件的完整內文
|
| 1524 |
|
|
|
|
| 1525 |
for f in files:
|
| 1526 |
+
path = get_file_path_tab6(f) # 使用新版 get_file_path_tab6
|
| 1527 |
+
if not path:
|
| 1528 |
+
print("DEBUG: get_file_path_tab6 returned None for file:", f)
|
| 1529 |
+
continue
|
| 1530 |
+
if not os.path.exists(path):
|
| 1531 |
+
print("DEBUG: Returned path does not exist:", path)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1532 |
continue
|
| 1533 |
+
file_names.append(os.path.basename(path))
|
| 1534 |
+
print("DEBUG: Processing file:", path)
|
| 1535 |
+
if path.lower().endswith(".pdf"):
|
| 1536 |
+
loader = PyPDFLoader(path)
|
| 1537 |
+
elif path.lower().endswith(".docx"):
|
| 1538 |
+
loader = UnstructuredWordDocumentLoader(path)
|
| 1539 |
+
else:
|
| 1540 |
+
loader = TextLoader(path)
|
| 1541 |
+
docs = loader.load()
|
| 1542 |
+
print("DEBUG: Docs loaded from", path, ":", docs)
|
| 1543 |
+
if docs and hasattr(docs[0], "page_content"):
|
| 1544 |
+
text = "\n".join([doc.page_content for doc in docs])
|
| 1545 |
+
else:
|
| 1546 |
+
text = "\n".join(docs)
|
| 1547 |
+
docs_by_file.append(text)
|
| 1548 |
+
all_docs.extend(docs)
|
| 1549 |
|
| 1550 |
+
# 建立檢索器(用於非多代理流程)
|
| 1551 |
if not all_docs:
|
| 1552 |
+
print("DEBUG: No valid document content read. file_names:", file_names)
|
| 1553 |
+
retriever = None
|
| 1554 |
+
else:
|
| 1555 |
+
chunks = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50).split_documents(all_docs)
|
| 1556 |
+
db = FAISS.from_documents(chunks, embeddings)
|
| 1557 |
+
retriever = db.as_retriever()
|
| 1558 |
+
global session_retriever
|
| 1559 |
+
session_retriever = retriever
|
| 1560 |
+
global session_qa_chain
|
| 1561 |
+
session_qa_chain = ConversationalRetrievalChain.from_llm(
|
| 1562 |
+
llm=llm_gpt4,
|
| 1563 |
+
retriever=retriever,
|
| 1564 |
+
memory=ConversationBufferMemory(memory_key="chat_history", return_messages=True),
|
| 1565 |
+
)
|
|
|
|
| 1566 |
|
| 1567 |
+
# 解析查詢拆解子意圖
|
| 1568 |
parsed = parse_query(query)
|
| 1569 |
if (parsed.get("summarise") or parsed.get("compare")) and len(docs_by_file) > 0:
|
| 1570 |
final_answer = execute_multi_agent(parsed, docs_by_file, file_names)
|
| 1571 |
return final_answer
|
| 1572 |
|
| 1573 |
+
# 否則,走原有 LangGraph pipeline
|
| 1574 |
graph = build_langgraph_pipeline()
|
| 1575 |
state = {"query": query, "file_names": file_names}
|
| 1576 |
if retriever is not None:
|
| 1577 |
state["retriever"] = retriever
|
|
|
|
| 1578 |
result = graph.invoke(state)
|
| 1579 |
if "answer" in result:
|
| 1580 |
return result["answer"]
|
| 1581 |
if "summary" in result:
|
| 1582 |
return result["summary"]
|
| 1583 |
return "No answer."
|
|
|
|
| 1584 |
except Exception as e:
|
| 1585 |
+
return f"[Tab6 Error] {e}"
|
|
|
|
| 1586 |
|
| 1587 |
# Gradio Interface Settings
|
| 1588 |
demo_description = """
|