Spaces:

ss900371tw
/

HM

Sleeping

App Files Files Community

ss900371tw commited on Dec 15, 2025

Commit

663082d

verified ·

1 Parent(s): dd69b4f

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +96 -62

src/streamlit_app.py CHANGED Viewed

@@ -1,15 +1,13 @@
 import streamlit as st
 import os
 import io
 import json
-import csv  # <--- 新增：用於處理 CSV
 import numpy as np
 import faiss
 import uuid
 import time
 import sys
 # === HuggingFace 模型相關套件 (替換為 InferenceClient) ===
 try:
     from huggingface_hub import InferenceClient
@@ -22,8 +20,7 @@ from langchain_core.documents import Document
 from langchain_community.vectorstores import FAISS
 from langchain_community.vectorstores.utils import DistanceStrategy
 from langchain_community.docstore.in_memory import InMemoryDocstore
-# 嘗試匯入 pypdftry
 try:
     import pypdf
 except ImportError:
@@ -37,14 +34,14 @@ st.markdown("已啟用：**IndexFlatIP** + **L2 正規化** + **Hugging Face Inf
 if 'execute_batch_analysis' not in st.session_state:
     st.session_state.execute_batch_analysis = False
 if 'batch_results' not in st.session_state:
-    st.session_state.batch_results = None
 if 'rag_current_file_key' not in st.session_state:
     st.session_state.rag_current_file_key = None
-if 'batch_current_file_key' not in st.session_state: # 修改變數名稱以反映多格式
     st.session_state.batch_current_file_key = None
 if 'vector_store' not in st.session_state:
     st.session_state.vector_store = None
-if 'json_data_for_batch' not in st.session_state: # 變數名稱保留，但內容可能是轉換後的 dict
     st.session_state.json_data_for_batch = None
 # 設定模型 ID
@@ -54,24 +51,23 @@ WINDOW_SIZE = 8
 # --- 側邊欄設定 ---
 with st.sidebar:
     st.header("⚙️ 設定")
     if not os.environ.get("HF_TOKEN"):
-         st.error("環境變數 **HF_TOKEN** 未設定。請設定後重新啟動應用程式。")
     st.info(f"LLM 模型：**{MODEL_ID}** (Hugging Face Inference API)")
     st.warning("⚠️ **注意**: 該模型使用 Inference API 呼叫，請確保您的 HF Token 具有存取權限。")
     st.divider()
     st.subheader("📂 檔案上傳")
     # === 1. 批量分析檔案 (修改處：支援多種格式) ===
     batch_uploaded_file = st.file_uploader(
         "1️⃣ 上傳 **Log/Alert 檔案** (用於批量分析)",
-        type=['json', 'csv', 'txt'], # <--- 修改：新增 csv 和 txt
         key="batch_uploader",
         help="支援 JSON (Array), CSV (含標題), TXT (每行一條 Log)"
     )
     # === 2. RAG 知識庫檔案 ===
     rag_uploaded_file = st.file_uploader(
         "2️⃣ 上傳 **RAG 參考知識庫** (Logs/PDF/Code 等)",
@@ -81,6 +77,16 @@ with st.sidebar:
     st.divider()
     st.subheader("💡 批量分析指令")
     analysis_prompt = st.text_area(
         "針對每個 Log/Alert 執行的指令",
@@ -88,31 +94,33 @@ with st.sidebar:
         height=200
     )
     st.markdown("此指令將對檔案中的**每一個 Log 條目**執行一次獨立分析。")
-    if batch_uploaded_file:
         if st.button("🚀 執行批量分析"):
             if not os.environ.get("HF_TOKEN"):
                 st.error("無法執行，環境變數 **HF_TOKEN** 未設定。")
-            else:
                 st.session_state.execute_batch_analysis = True
     else:
         st.info("請上傳 Log 檔案以啟用批量分析按鈕。")
     st.divider()
     st.subheader("🔍 RAG 檢索設定")
     similarity_threshold = st.slider("📐 Cosine Similarity 門檻", 0.0, 1.0, 0.4, 0.01)
     st.divider()
     st.subheader("模型參數")
     system_prompt = st.text_area("System Prompt", value="You are a Senior Security Analyst, named Ernest. You provide expert, authoritative, and concise advice on Information Security. Your analysis must be based strictly on the provided context.", height=100)
     max_output_tokens = st.slider("Max Output Tokens", 128, 4096, 2048, 128)
-    temperature = st.slider("Temperature", 0.0, 1.0, 0.1, 0.1)
     top_p = st.slider("Top P", 0.1, 1.0, 0.95, 0.05)
     st.divider()
     if st.button("🗑️ 清除所有紀錄"):
         for key in list(st.session_state.keys()):
-             del st.session_state[key]
         st.rerun()
 # --- 初始化 Hugging Face LLM Client ---
@@ -131,9 +139,10 @@ inference_client = None
 if os.environ.get("HF_TOKEN"):
     with st.spinner(f"正在連線到 Inference Client: {MODEL_ID}..."):
         inference_client = load_inference_client(MODEL_ID)
 if inference_client is None and os.environ.get("HF_TOKEN"):
     st.warning("Hugging Face Inference Client 無法連線。")
-elif not os.environ.get("HF_TOKEN"):
     st.error("請在環境變數中設定 HF_TOKEN。")
 # === Embedding 模型 (保持不變) ===
@@ -148,6 +157,7 @@ with st.spinner("正在載入 Embedding 模型..."):
 # === 建立向量庫 / Search 函數 (保持不變) ===
 def process_file_to_faiss(uploaded_file):
     text_content = ""
     try:
         if uploaded_file.type == "application/pdf":
@@ -159,31 +169,32 @@ def process_file_to_faiss(uploaded_file):
         else:
             stringio = io.StringIO(uploaded_file.getvalue().decode("utf-8"))
             text_content = stringio.read()
         if not text_content.strip(): return None, "File is empty"
         events = [line for line in text_content.splitlines() if line.strip()]
         docs = [Document(page_content=e) for e in events]
         if not docs: return None, "No documents created"
         embeddings = embedding_model.embed_documents([d.page_content for d in docs])
         embeddings_np = np.array(embeddings).astype("float32")
         faiss.normalize_L2(embeddings_np)
         dimension = embeddings_np.shape[1]
         index = faiss.IndexFlatIP(dimension)
         index.add(embeddings_np)
         doc_ids = [str(uuid.uuid4()) for _ in range(len(docs))]
         docstore = InMemoryDocstore({_id: doc for _id, doc in zip(doc_ids, docs)})
         index_to_docstore_id = {i: _id for i, _id in enumerate(doc_ids)}
         vector_store = FAISS(embedding_function=embedding_model, index=index, docstore=docstore, index_to_docstore_id=index_to_docstore_id, distance_strategy=DistanceStrategy.COSINE)
         return vector_store, f"{len(docs)} chunks created."
     except Exception as e:
         return None, f"Error: {str(e)}"
 def faiss_cosine_search_all(vector_store, query, threshold):
     q_emb = embedding_model.embed_query(query)
     q_emb = np.array([q_emb]).astype("float32")
     faiss.normalize_L2(q_emb)
@@ -201,6 +212,7 @@ def faiss_cosine_search_all(vector_store, query, threshold):
 # === Hugging Face 生成單一 Log 分析回答 (保持不變) ===
 def generate_rag_response_hf_for_log(client, model_id, log_sequence_text, user_prompt, sys_prompt, vector_store, threshold, max_output_tokens, temperature, top_p):
     if client is None: return "ERROR: Client Error", ""
     context_text = ""
     if vector_store:
@@ -208,14 +220,15 @@ def generate_rag_response_hf_for_log(client, model_id, log_sequence_text, user_p
         if selected:
             retrieved_contents = [f"--- Reference Chunk (sim={score:.3f}) ---\n{doc.page_content}" for i, (doc, score) in enumerate(selected[:5])]
             context_text = "\n".join(retrieved_contents)
     rag_instruction = f"""=== RETRIEVED REFERENCE CONTEXT (Cosine ≥ {threshold}) ==={context_text if context_text else 'No relevant reference context found.'}=== END REFERENCE CONTEXT ===\nANALYSIS INSTRUCTION: {user_prompt}\nBased on the provided LOG SEQUENCE and REFERENCE CONTEXT, you must analyze the **entire sequence** to detect any continuous attack chains or evolving threats."""
     log_content_section = f"""=== CURRENT LOG SEQUENCE TO ANALYZE (Window Size: {WINDOW_SIZE}) ===\n{log_sequence_text}\n=== END LOG SEQUENCE ==="""
     messages = [
         {"role": "system", "content": sys_prompt},
         {"role": "user", "content": f"{rag_instruction}\n\n{log_content_section}"}
     ]
     try:
         response_stream = client.chat_completion(messages, max_tokens=max_output_tokens, temperature=temperature, top_p=top_p, stream=False)
         if response_stream and response_stream.choices:
@@ -244,31 +257,51 @@ elif 'vector_store' in st.session_state:
 # 支援 JSON, CSV, TXT 並統一轉換為 list of dicts
 if batch_uploaded_file:
     batch_file_key = f"batch_{batch_uploaded_file.name}_{batch_uploaded_file.size}"
     if st.session_state.batch_current_file_key != batch_file_key or 'json_data_for_batch' not in st.session_state:
         try:
-            stringio = io.StringIO(batch_uploaded_file.getvalue().decode("utf-8"))
             parsed_data = None
             # --- Case 1: JSON ---
-            if batch_uploaded_file.name.lower().endswith('.json'):
                 parsed_data = json.load(stringio)
                 st.toast("JSON 檔案載入成功", icon="📄")
-            # --- Case 2: CSV ---
-            elif batch_uploaded_file.name.lower().endswith('.csv'):
-                # 使用 DictReader 將 CSV 轉為 List of Dicts
                 reader = csv.DictReader(stringio)
                 parsed_data = list(reader)
-                st.toast("CSV 檔案已轉換為 JSON 結構", icon="📊")
-            # --- Case 3: TXT ---
             else: # 預設為 TXT
-                # 將每一行包裝成一個 JSON 物件: {"raw_content": "line text"}
-                lines = stringio.readlines()
-                parsed_data = [{"raw_log_entry": line.strip()} for line in lines if line.strip()]
-                st.toast("TXT 檔案已轉換為 JSON 結構", icon="📝")
             # 儲存處理後的數據
             st.session_state.json_data_for_batch = parsed_data
             st.session_state.batch_current_file_key = batch_file_key
@@ -285,18 +318,18 @@ elif 'json_data_for_batch' in st.session_state:
         del st.session_state.batch_results
     st.info("批量分析檔案已移除，已清除相關數據。")
-# === 執行批量分析邏輯 ===
 if st.session_state.execute_batch_analysis and 'json_data_for_batch' in st.session_state:
     st.session_state.execute_batch_analysis = False
     start_time = time.time()
     st.session_state.batch_results = []
     if inference_client is None:
         st.error("Client 未連線，無法執行。")
     else:
         data_to_process = st.session_state.json_data_for_batch
         logs_list = []
         # 處理不同的 JSON 結構 (Dict vs List)
         if isinstance(data_to_process, list):
             logs_list = data_to_process
@@ -310,15 +343,15 @@ if st.session_state.execute_batch_analysis and 'json_data_for_batch' in st.sessi
                 logs_list = [data_to_process]
         else:
             logs_list = [data_to_process]
         if logs_list:
             vs = st.session_state.get("vector_store", None)
             # --- 關鍵：在這裡做 JSON String 的轉換 ---
             # 無論來源是 CSV(Dict) 還是 TXT(Dict)，都在這裡用 json.dumps 轉成字串
             # 這保證了 Prompt 收到的永遠是 JSON 格式的文字
             formatted_logs = [json.dumps(log, indent=2, ensure_ascii=False) for log in logs_list]
             analysis_sequences = []
             for i in range(len(formatted_logs)):
                 start_index = max(0, i - WINDOW_SIZE + 1)
@@ -333,17 +366,17 @@ if st.session_state.execute_batch_analysis and 'json_data_for_batch' in st.sessi
                     "target_log_id": i + 1,
                     "original_log_entry": logs_list[i]
                 })
             total_sequences = len(analysis_sequences)
             st.header(f"⚡ 批量分析執行中 (平移視窗 $N={WINDOW_SIZE}$)...")
             progress_bar = st.progress(0, text=f"準備處理 {total_sequences} 個序列...")
             results_container = st.container()
             full_report_chunks = ["## Cybersecurity Batch Analysis Report\n\n"]
             for i, seq_data in enumerate(analysis_sequences):
                 log_id = seq_data["target_log_id"]
                 progress_bar.progress((i + 1) / total_sequences, text=f"Processing {i + 1}/{total_sequences} (Log #{log_id})...")
                 try:
                     response, retrieved_ctx = generate_rag_response_hf_for_log(
                         client=inference_client,
@@ -365,25 +398,26 @@ if st.session_state.execute_batch_analysis and 'json_data_for_batch' in st.sessi
                         "context": retrieved_ctx
                     }
                     st.session_state.batch_results.append(item)
                     with results_container:
                         st.subheader(f"Log/Alert #{item['log_id']}")
                         with st.expander("序列內容 (JSON Format)"):
-                            st.code(item["sequence_analyzed"], language='json') # 這裡顯示的會是 JSON 格式
                         is_high = any(x in response.lower() for x in ['high risk'])
                         if is_high: st.error(item['analysis_result'])
                         else: st.info(item['analysis_result'])
                         if item['context']:
                             with st.expander("參考 RAG 片段"): st.code(item['context'])
                         st.markdown("---")
                         log_content_str_for_report = json.dumps(item["log_content"], indent=2, ensure_ascii=False).replace("`", "\\`")
                         full_report_chunks.append(f"---\n\n### Log #{item['log_id']}\n```json\n{log_content_str_for_report}\n```\nResult:\n{item['analysis_result']}\n")
                 except Exception as e:
                     st.error(f"Error Log {log_id}: {e}")
             end_time = time.time()
             progress_bar.empty()
             st.success(f"完成！耗時 {end_time - start_time:.2f} 秒。")

 import streamlit as st
 import os
 import io
 import json
+import csv
 import numpy as np
 import faiss
 import uuid
 import time
 import sys
 # === HuggingFace 模型相關套件 (替換為 InferenceClient) ===
 try:
     from huggingface_hub import InferenceClient
 from langchain_community.vectorstores import FAISS
 from langchain_community.vectorstores.utils import DistanceStrategy
 from langchain_community.docstore.in_memory import InMemoryDocstore
+# 嘗試匯入 pypdf
 try:
     import pypdf
 except ImportError:
 if 'execute_batch_analysis' not in st.session_state:
     st.session_state.execute_batch_analysis = False
 if 'batch_results' not in st.session_state:
+    st.session_state.batch_results = None
 if 'rag_current_file_key' not in st.session_state:
     st.session_state.rag_current_file_key = None
+if 'batch_current_file_key' not in st.session_state:
     st.session_state.batch_current_file_key = None
 if 'vector_store' not in st.session_state:
     st.session_state.vector_store = None
+if 'json_data_for_batch' not in st.session_state:
     st.session_state.json_data_for_batch = None
 # 設定模型 ID
 # --- 側邊欄設定 ---
 with st.sidebar:
     st.header("⚙️ 設定")
     if not os.environ.get("HF_TOKEN"):
+        st.error("環境變數 **HF_TOKEN** 未設定。請設定後重新啟動應用程式。")
     st.info(f"LLM 模型：**{MODEL_ID}** (Hugging Face Inference API)")
     st.warning("⚠️ **注意**: 該模型使用 Inference API 呼叫，請確保您的 HF Token 具有存取權限。")
     st.divider()
     st.subheader("📂 檔案上傳")
     # === 1. 批量分析檔案 (修改處：支援多種格式) ===
     batch_uploaded_file = st.file_uploader(
         "1️⃣ 上傳 **Log/Alert 檔案** (用於批量分析)",
+        type=['json', 'csv', 'txt'],
         key="batch_uploader",
         help="支援 JSON (Array), CSV (含標題), TXT (每行一條 Log)"
     )
     # === 2. RAG 知識庫檔案 ===
     rag_uploaded_file = st.file_uploader(
         "2️⃣ 上傳 **RAG 參考知識庫** (Logs/PDF/Code 等)",
     st.divider()
+    # === TXT 處理方式選項 (新增) ===
+    st.subheader("📄 TXT 檔案處理")
+    txt_format_option = st.radio(
+        "TXT 內容轉換方式",
+        ["每行作為 `raw_log_entry` 的值", "忽略 (請確保您的 TXT 是有效的 JSON 陣列)"],
+        index=0,
+        help="選擇 TXT 檔案的每一行應如何轉換為 JSON 物件。"
+    )
+    st.divider()
     st.subheader("💡 批量分析指令")
     analysis_prompt = st.text_area(
         "針對每個 Log/Alert 執行的指令",
         height=200
     )
     st.markdown("此指令將對檔案中的**每一個 Log 條目**執行一次獨立分析。")
+    if batch_uploaded_file:
         if st.button("🚀 執行批量分析"):
             if not os.environ.get("HF_TOKEN"):
                 st.error("無法執行，環境變數 **HF_TOKEN** 未設定。")
+            else:
                 st.session_state.execute_batch_analysis = True
     else:
         st.info("請上傳 Log 檔案以啟用批量分析按鈕。")
     st.divider()
     st.subheader("🔍 RAG 檢索設定")
     similarity_threshold = st.slider("📐 Cosine Similarity 門檻", 0.0, 1.0, 0.4, 0.01)
     st.divider()
     st.subheader("模型參數")
     system_prompt = st.text_area("System Prompt", value="You are a Senior Security Analyst, named Ernest. You provide expert, authoritative, and concise advice on Information Security. Your analysis must be based strictly on the provided context.", height=100)
     max_output_tokens = st.slider("Max Output Tokens", 128, 4096, 2048, 128)
+    temperature = st.slider("Temperature", 0.0, 1.0, 0.1, 0.1)
     top_p = st.slider("Top P", 0.1, 1.0, 0.95, 0.05)
     st.divider()
     if st.button("🗑️ 清除所有紀錄"):
         for key in list(st.session_state.keys()):
+            # 排除 HF_TOKEN，如果它在 session_state 中
+            if key != 'HF_TOKEN':
+                 del st.session_state[key]
         st.rerun()
 # --- 初始化 Hugging Face LLM Client ---
 if os.environ.get("HF_TOKEN"):
     with st.spinner(f"正在連線到 Inference Client: {MODEL_ID}..."):
         inference_client = load_inference_client(MODEL_ID)
 if inference_client is None and os.environ.get("HF_TOKEN"):
     st.warning("Hugging Face Inference Client 無法連線。")
+elif not os.environ.get("HF_TOKEN"):
     st.error("請在環境變數中設定 HF_TOKEN。")
 # === Embedding 模型 (保持不變) ===
 # === 建立向量庫 / Search 函數 (保持不變) ===
 def process_file_to_faiss(uploaded_file):
+    # ... (此函數內容保持不變，因為它是處理 RAG 文件的，與 CSV/TXT 批量分析邏輯獨立)
     text_content = ""
     try:
         if uploaded_file.type == "application/pdf":
         else:
             stringio = io.StringIO(uploaded_file.getvalue().decode("utf-8"))
             text_content = stringio.read()
         if not text_content.strip(): return None, "File is empty"
         events = [line for line in text_content.splitlines() if line.strip()]
         docs = [Document(page_content=e) for e in events]
         if not docs: return None, "No documents created"
         embeddings = embedding_model.embed_documents([d.page_content for d in docs])
         embeddings_np = np.array(embeddings).astype("float32")
         faiss.normalize_L2(embeddings_np)
         dimension = embeddings_np.shape[1]
         index = faiss.IndexFlatIP(dimension)
         index.add(embeddings_np)
         doc_ids = [str(uuid.uuid4()) for _ in range(len(docs))]
         docstore = InMemoryDocstore({_id: doc for _id, doc in zip(doc_ids, docs)})
         index_to_docstore_id = {i: _id for i, _id in enumerate(doc_ids)}
         vector_store = FAISS(embedding_function=embedding_model, index=index, docstore=docstore, index_to_docstore_id=index_to_docstore_id, distance_strategy=DistanceStrategy.COSINE)
         return vector_store, f"{len(docs)} chunks created."
     except Exception as e:
         return None, f"Error: {str(e)}"
 def faiss_cosine_search_all(vector_store, query, threshold):
+    # ... (此函數內容保持不變)
     q_emb = embedding_model.embed_query(query)
     q_emb = np.array([q_emb]).astype("float32")
     faiss.normalize_L2(q_emb)
 # === Hugging Face 生成單一 Log 分析回答 (保持不變) ===
 def generate_rag_response_hf_for_log(client, model_id, log_sequence_text, user_prompt, sys_prompt, vector_store, threshold, max_output_tokens, temperature, top_p):
+    # ... (此函數內容保持不變)
     if client is None: return "ERROR: Client Error", ""
     context_text = ""
     if vector_store:
         if selected:
             retrieved_contents = [f"--- Reference Chunk (sim={score:.3f}) ---\n{doc.page_content}" for i, (doc, score) in enumerate(selected[:5])]
             context_text = "\n".join(retrieved_contents)
     rag_instruction = f"""=== RETRIEVED REFERENCE CONTEXT (Cosine ≥ {threshold}) ==={context_text if context_text else 'No relevant reference context found.'}=== END REFERENCE CONTEXT ===\nANALYSIS INSTRUCTION: {user_prompt}\nBased on the provided LOG SEQUENCE and REFERENCE CONTEXT, you must analyze the **entire sequence** to detect any continuous attack chains or evolving threats."""
     log_content_section = f"""=== CURRENT LOG SEQUENCE TO ANALYZE (Window Size: {WINDOW_SIZE}) ===\n{log_sequence_text}\n=== END LOG SEQUENCE ==="""
     messages = [
         {"role": "system", "content": sys_prompt},
         {"role": "user", "content": f"{rag_instruction}\n\n{log_content_section}"}
     ]
     try:
         response_stream = client.chat_completion(messages, max_tokens=max_output_tokens, temperature=temperature, top_p=top_p, stream=False)
         if response_stream and response_stream.choices:
 # 支援 JSON, CSV, TXT 並統一轉換為 list of dicts
 if batch_uploaded_file:
     batch_file_key = f"batch_{batch_uploaded_file.name}_{batch_uploaded_file.size}"
     if st.session_state.batch_current_file_key != batch_file_key or 'json_data_for_batch' not in st.session_state:
         try:
+            # 必須使用 io.BytesIO 和 decode，才能正確處理 CSV/TXT 檔案
+            # 並且需要 rewind()
+            file_bytes = batch_uploaded_file.getvalue()
+            stringio = io.StringIO(file_bytes.decode("utf-8"))
             parsed_data = None
+            file_name_lower = batch_uploaded_file.name.lower()
             # --- Case 1: JSON ---
+            if file_name_lower.endswith('.json'):
                 parsed_data = json.load(stringio)
                 st.toast("JSON 檔案載入成功", icon="📄")
+            # --- Case 2: CSV (修正：使用 DictReader) ---
+            elif file_name_lower.endswith('.csv'):
+                # DictReader 會自動將第一行視為 Key
+                # 必須使用 file_bytes.decode() 確保編碼正確性
+                stringio.seek(0)
                 reader = csv.DictReader(stringio)
                 parsed_data = list(reader)
+                if not parsed_data:
+                    raise ValueError("CSV 檔案載入失敗或內容為空。")
+                st.toast("CSV 檔案已轉換為 JSON 結構 (第一行為 Key)", icon="📊")
+            # --- Case 3: TXT (修正：根據 radio 選項處理) ---
             else: # 預設為 TXT
+                if txt_format_option == "每行作為 `raw_log_entry` 的值":
+                    stringio.seek(0)
+                    lines = stringio.readlines()
+                    # 將每一行包裝成一個 JSON 物件: {"raw_log_entry": "line text"}
+                    parsed_data = [{"raw_log_entry": line.strip()} for line in lines if line.strip()]
+                    st.toast("TXT 檔案已轉換為 JSON 結構 (每行為 raw_log_entry)", icon="📝")
+                else:
+                    # 如果用戶選擇忽略，則假設 TXT 內容本身就是一個有效的 JSON 陣列或物件
+                    stringio.seek(0)
+                    text_content = stringio.read().strip()
+                    if text_content:
+                        parsed_data = json.loads(text_content)
+                        st.toast("TXT 檔案已作為 JSON 載入", icon="📝")
+                    else:
+                         raise ValueError("TXT 檔案內容為空。")
             # 儲存處理後的數據
             st.session_state.json_data_for_batch = parsed_data
             st.session_state.batch_current_file_key = batch_file_key
         del st.session_state.batch_results
     st.info("批量分析檔案已移除，已清除相關數據。")
+# === 執行批量分析邏輯 (保持不變，因為 formatted_logs 已經將 Dict 轉為 JSON 字串) ===
 if st.session_state.execute_batch_analysis and 'json_data_for_batch' in st.session_state:
     st.session_state.execute_batch_analysis = False
     start_time = time.time()
     st.session_state.batch_results = []
     if inference_client is None:
         st.error("Client 未連線，無法執行。")
     else:
         data_to_process = st.session_state.json_data_for_batch
         logs_list = []
         # 處理不同的 JSON 結構 (Dict vs List)
         if isinstance(data_to_process, list):
             logs_list = data_to_process
                 logs_list = [data_to_process]
         else:
             logs_list = [data_to_process]
         if logs_list:
             vs = st.session_state.get("vector_store", None)
             # --- 關鍵：在這裡做 JSON String 的轉換 ---
             # 無論來源是 CSV(Dict) 還是 TXT(Dict)，都在這裡用 json.dumps 轉成字串
             # 這保證了 Prompt 收到的永遠是 JSON 格式的文字
             formatted_logs = [json.dumps(log, indent=2, ensure_ascii=False) for log in logs_list]
             analysis_sequences = []
             for i in range(len(formatted_logs)):
                 start_index = max(0, i - WINDOW_SIZE + 1)
                     "target_log_id": i + 1,
                     "original_log_entry": logs_list[i]
                 })
             total_sequences = len(analysis_sequences)
             st.header(f"⚡ 批量分析執行中 (平移視窗 $N={WINDOW_SIZE}$)...")
             progress_bar = st.progress(0, text=f"準備處理 {total_sequences} 個序列...")
             results_container = st.container()
             full_report_chunks = ["## Cybersecurity Batch Analysis Report\n\n"]
             for i, seq_data in enumerate(analysis_sequences):
                 log_id = seq_data["target_log_id"]
                 progress_bar.progress((i + 1) / total_sequences, text=f"Processing {i + 1}/{total_sequences} (Log #{log_id})...")
                 try:
                     response, retrieved_ctx = generate_rag_response_hf_for_log(
                         client=inference_client,
                         "context": retrieved_ctx
                     }
                     st.session_state.batch_results.append(item)
                     with results_container:
                         st.subheader(f"Log/Alert #{item['log_id']}")
                         with st.expander("序列內容 (JSON Format)"):
+                            # 這裡顯示的會是 JSON 格式的 Log Sequence
+                            st.code(item["sequence_analyzed"], language='json')
                         is_high = any(x in response.lower() for x in ['high risk'])
                         if is_high: st.error(item['analysis_result'])
                         else: st.info(item['analysis_result'])
                         if item['context']:
                             with st.expander("參考 RAG 片段"): st.code(item['context'])
                         st.markdown("---")
                         log_content_str_for_report = json.dumps(item["log_content"], indent=2, ensure_ascii=False).replace("`", "\\`")
                         full_report_chunks.append(f"---\n\n### Log #{item['log_id']}\n```json\n{log_content_str_for_report}\n```\nResult:\n{item['analysis_result']}\n")
                 except Exception as e:
                     st.error(f"Error Log {log_id}: {e}")
             end_time = time.time()
             progress_bar.empty()
             st.success(f"完成！耗時 {end_time - start_time:.2f} 秒。")