Spaces:

ChienChung
/

SmartRAG_Multi-Agent_Assistant

Runtime error

App Files Files Community

ChienChung commited on Apr 6, 2025

Commit

e8a83a0

verified ·

1 Parent(s): a3bc85b

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -5

app.py CHANGED Viewed

@@ -8,6 +8,7 @@ import requests
 import transformers
 import chardet
 import deepeval
 from transformers import AutoModelForCausalLM, AutoTokenizer
 from transformers.models.llama.configuration_llama import LlamaConfig
 from huggingface_hub import hf_hub_download
@@ -1388,18 +1389,23 @@ def get_file_path_tab6(file):
         return None
 # 修改後的 Tab6 主執行函式
 def langgraph_tab6_main(query: str, file=None):
     try:
         # 以多檔案模式處理上傳的檔案
         files = file if isinstance(file, list) else [file] if file else []
-        all_docs, file_names = [], []
         for f in files:
-            path = get_file_path(f)  # 使用原有的 get_file_path
             if not path:
                 print("get_file_path returned None for file:", f)
                 continue
             file_names.append(os.path.basename(path))
             print("Processing file:", path)
             if path.lower().endswith(".pdf"):
                 loader = PyPDFLoader(path)
             elif path.lower().endswith(".docx"):
@@ -1408,8 +1414,29 @@ def langgraph_tab6_main(query: str, file=None):
                 loader = TextLoader(path)
             docs = loader.load()
             print("Docs loaded from", path, ":", docs)
             all_docs.extend(docs)
         if not all_docs:
             print("No document content read. file_names:", file_names)
             retriever = None
@@ -1417,10 +1444,8 @@ def langgraph_tab6_main(query: str, file=None):
             chunks = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50).split_documents(all_docs)
             db = FAISS.from_documents(chunks, embeddings)
             retriever = db.as_retriever()
-            # 將建立好的 retriever 指派到全域變數 session_retriever
             global session_retriever
             session_retriever = retriever
-            # 同時建立 Document QA 的 ConversationalRetrievalChain，供 uploaded_qa_tool_func 使用
             global session_qa_chain
             session_qa_chain = ConversationalRetrievalChain.from_llm(
                 llm=llm_gpt4,
@@ -1440,7 +1465,6 @@ def langgraph_tab6_main(query: str, file=None):
         return "No answer."
     except Exception as e:
         return f"[Tab6 Error] {e}"
 # Gradio Interface Settings
 demo_description = """

 import transformers
 import chardet
 import deepeval
+import difflib
 from transformers import AutoModelForCausalLM, AutoTokenizer
 from transformers.models.llama.configuration_llama import LlamaConfig
 from huggingface_hub import hf_hub_download
         return None
 # 修改後的 Tab6 主執行函式
 def langgraph_tab6_main(query: str, file=None):
     try:
         # 以多檔案模式處理上傳的檔案
         files = file if isinstance(file, list) else [file] if file else []
+        all_docs = []        # 用來合併所有檔案內文（後續用於建立檢索器）
+        file_names = []      # 儲存每份檔案的檔名
+        docs_by_file = []    # 儲存每份檔案的完整文字（分開保存）
         for f in files:
+            path = get_file_path(f)  # 使用現有的 get_file_path
             if not path:
                 print("get_file_path returned None for file:", f)
                 continue
             file_names.append(os.path.basename(path))
             print("Processing file:", path)
+            # 根據副檔名選擇對應 Loader
             if path.lower().endswith(".pdf"):
                 loader = PyPDFLoader(path)
             elif path.lower().endswith(".docx"):
                 loader = TextLoader(path)
             docs = loader.load()
             print("Docs loaded from", path, ":", docs)
+            # 將文件內容合併成單一文字（假設每個 doc 有 page_content 屬性，否則直接 join）
+            if docs and hasattr(docs[0], "page_content"):
+                text = "\n".join([doc.page_content for doc in docs])
+            else:
+                text = "\n".join(docs)
+            docs_by_file.append(text)
             all_docs.extend(docs)
+        # 如果上傳了兩份以上檔案，且查詢中包含 "differ"（例如 "difference", "different"）
+        if len(docs_by_file) >= 2 and "differ" in query.lower():
+            diff = difflib.unified_diff(
+                docs_by_file[0].splitlines(),
+                docs_by_file[1].splitlines(),
+                fromfile=file_names[0],
+                tofile=file_names[1],
+                lineterm=''
+            )
+            diff_text = "\n".join(list(diff))
+            if not diff_text.strip():
+                diff_text = "The two documents appear to be identical."
+            return diff_text
+        # 否則，採用合併所有檔案內文的方式建立檢索器
         if not all_docs:
             print("No document content read. file_names:", file_names)
             retriever = None
             chunks = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50).split_documents(all_docs)
             db = FAISS.from_documents(chunks, embeddings)
             retriever = db.as_retriever()
             global session_retriever
             session_retriever = retriever
             global session_qa_chain
             session_qa_chain = ConversationalRetrievalChain.from_llm(
                 llm=llm_gpt4,
         return "No answer."
     except Exception as e:
         return f"[Tab6 Error] {e}"
 # Gradio Interface Settings
 demo_description = """