ss900371tw commited on
Commit
84006ad
·
verified ·
1 Parent(s): bcb1b3e

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +76 -74
src/streamlit_app.py CHANGED
@@ -32,7 +32,7 @@ except ImportError:
32
  # --- 頁面設定 ---
33
  st.set_page_config(page_title="Cybersecurity AI Assistant (Hugging Face RAG & Batch Analysis)", page_icon="🛡️", layout="wide")
34
  st.title("🛡️ Meta-Llama-3-8B-Instruct with FAISS RAG & Batch Analysis (Inference Client)")
35
- st.markdown("已啟用:**IndexFlatIP** + **L2 正規化** + **Hugging Face Inference Client (API)**。支援 JSON/CSV/TXT 執行批量分析。")
36
 
37
  # --- Streamlit Session State 初始化 (保持不變) ---
38
  if 'execute_batch_analysis' not in st.session_state:
@@ -49,22 +49,19 @@ if 'json_data_for_batch' not in st.session_state:
49
  st.session_state.json_data_for_batch = None
50
 
51
  # 設定模型 ID
52
- MODEL_ID = "meta-llama/Llama-4-Scout-17B-16E-Instruct"
53
  WINDOW_SIZE = 8
54
 
55
- # === 核心檔案轉換函式 (CSV/TXT -> JSON List) ===
56
  def convert_csv_txt_to_json_list(file_content: bytes, file_type: str) -> List[Dict[str, Any]]:
57
  """
58
- 將 CSV 或 TXT 檔案內容 (假定為 CSV 格式,含標頭) 轉換為 JSON 物件列表。
59
-
60
  Args:
61
  file_content (bytes): 上傳檔案的二進位內容。
62
- file_type (str): 檔案類型 ('csv' 或 'txt')。
63
-
64
  Returns:
65
  List[Dict[str, Any]]: 轉換後的 JSON 物件列表。
66
  """
67
- # 這裡我們使用 decode("utf-8") 來處理內容
68
  log_content = file_content.decode("utf-8").strip()
69
  if not log_content:
70
  return []
@@ -72,49 +69,54 @@ def convert_csv_txt_to_json_list(file_content: bytes, file_type: str) -> List[Di
72
  # 使用 StringIO 讓 csv 模組可以處理字串內容
73
  string_io = io.StringIO(log_content)
74
 
75
- # 使用 csv.DictReader 自動將第一行視為 Key
76
- # 如果 TXT 內容是 JSON,這裡會出錯,但在 Streamlit 上傳區塊會處理
77
- reader = csv.DictReader(string_io)
78
-
 
 
 
 
 
79
  json_data = []
80
-
81
- # 定義需要轉換為數字的欄位名稱 (可根據您的需求擴充)
82
- numeric_fields = ['sc-status', 'time-taken', 'bytes', 'resp-len', 'req-size']
83
-
84
- for row in reader:
85
- record = {}
86
- for key, value in row.items():
87
- key = key.strip() # 清理 key
88
- value = value.strip() # 清理 value
89
-
90
- # 數字轉換
91
- if key in numeric_fields:
92
- try:
93
- # 嘗試轉換為整數 (如果有小數點,int() 會拋出錯誤)
94
- record[key] = int(value)
95
- except ValueError:
96
  try:
97
- # 嘗試轉換為浮點數
98
- record[key] = float(value)
99
  except ValueError:
100
- # 轉換失敗則保持為字串
101
- record[key] = value
102
- else:
103
- record[key] = value
104
-
105
- if record: # 確保不是空紀錄
106
- json_data.append(record)
107
 
108
- if not json_data and file_type == 'txt':
109
- # 如果 csv.DictReader 失敗,嘗試將 TXT 視為每行一個原始 Log
110
- # (作為備用選項,類似您原始的 'raw_log_entry' 邏輯,但更簡化)
 
 
 
111
  string_io.seek(0)
112
  lines = string_io.readlines()
113
- if len(lines) > 0 and len(lines) <= 2: # 判斷是否為小文件,更可能不是標準 CSV/JSON
114
  return [{"raw_log_entry": line.strip()} for line in lines if line.strip()]
115
-
116
  return json_data
117
 
 
118
  def convert_uploaded_file_to_json_list(uploaded_file) -> List[Dict[str, Any]]:
119
  """根據檔案類型,將上傳的檔案內容轉換為 Log JSON 列表。"""
120
  file_bytes = uploaded_file.getvalue()
@@ -140,10 +142,11 @@ def convert_uploaded_file_to_json_list(uploaded_file) -> List[Dict[str, Any]]:
140
  else:
141
  raise ValueError("JSON 檔案格式不支援 (非 List 或 Dict)。")
142
 
143
- # --- Case 2 & 3: CSV/TXT ---
144
- elif file_name_lower.endswith(('.csv', '.txt')):
145
- # 假設 CSVTXT 都是逗號分隔格式 (含標頭)
146
- return convert_csv_txt_to_json_list(file_bytes, 'csv' if file_name_lower.endswith('.csv') else 'txt')
 
147
 
148
  else:
149
  raise ValueError("不支援的檔案類型。")
@@ -163,15 +166,15 @@ with st.sidebar:
163
  # === 1. 批量分析檔案 (支援多種格式) ===
164
  batch_uploaded_file = st.file_uploader(
165
  "1️⃣ 上傳 **Log/Alert 檔案** (用於批量分析)",
166
- type=['json', 'csv', 'txt'],
167
  key="batch_uploader",
168
- help="支援 JSON (Array), CSV (含標題), TXT (視為 CSV 或每行一個 Log)"
169
  )
170
 
171
  # === 2. RAG 知識庫檔案 ===
172
  rag_uploaded_file = st.file_uploader(
173
  "2️⃣ 上傳 **RAG 參考知識庫** (Logs/PDF/Code 等)",
174
- type=['txt', 'py', 'log', 'csv', 'md', 'pdf'],
175
  key="rag_uploader"
176
  )
177
  st.divider()
@@ -210,7 +213,7 @@ with st.sidebar:
210
  if st.button("🗑️ 清除所有紀錄"):
211
  for key in list(st.session_state.keys()):
212
  # 排除 HF_TOKEN,如果它在 session_state 中
213
- if key != 'HF_TOKEN':
214
  del st.session_state[key]
215
  st.rerun()
216
 
@@ -238,7 +241,6 @@ elif not os.environ.get("HF_TOKEN"):
238
 
239
  # === Embedding 模型 (保持不變) ===
240
  @st.cache_resource
241
-
242
  def load_embedding_model():
243
  model_kwargs = {'device': 'cpu', 'trust_remote_code': True}
244
  encode_kwargs = {'normalize_embeddings': False}
@@ -260,25 +262,25 @@ def process_file_to_faiss(uploaded_file):
260
  else:
261
  stringio = io.StringIO(uploaded_file.getvalue().decode("utf-8"))
262
  text_content = stringio.read()
263
-
264
  if not text_content.strip(): return None, "File is empty"
265
-
266
  events = [line for line in text_content.splitlines() if line.strip()]
267
  docs = [Document(page_content=e) for e in events]
268
  if not docs: return None, "No documents created"
269
-
270
  embeddings = embedding_model.embed_documents([d.page_content for d in docs])
271
  embeddings_np = np.array(embeddings).astype("float32")
272
  faiss.normalize_L2(embeddings_np)
273
-
274
  dimension = embeddings_np.shape[1]
275
  index = faiss.IndexFlatIP(dimension)
276
  index.add(embeddings_np)
277
-
278
  doc_ids = [str(uuid.uuid4()) for _ in range(len(docs))]
279
  docstore = InMemoryDocstore({_id: doc for _id, doc in zip(doc_ids, docs)})
280
  index_to_docstore_id = {i: _id for i, _id in enumerate(doc_ids)}
281
-
282
  vector_store = FAISS(embedding_function=embedding_model, index=index, docstore=docstore, index_to_docstore_id=index_to_docstore_id, distance_strategy=DistanceStrategy.COSINE)
283
  return vector_store, f"{len(docs)} chunks created."
284
  except Exception as e:
@@ -309,15 +311,15 @@ def generate_rag_response_hf_for_log(client, model_id, log_sequence_text, user_p
309
  if selected:
310
  retrieved_contents = [f"--- Reference Chunk (sim={score:.3f}) ---\n{doc.page_content}" for i, (doc, score) in enumerate(selected[:5])]
311
  context_text = "\n".join(retrieved_contents)
312
-
313
  rag_instruction = f"""=== RETRIEVED REFERENCE CONTEXT (Cosine ≥ {threshold}) ==={context_text if context_text else 'No relevant reference context found.'}=== END REFERENCE CONTEXT ===\nANALYSIS INSTRUCTION: {user_prompt}\nBased on the provided LOG SEQUENCE and REFERENCE CONTEXT, you must analyze the **entire sequence** to detect any continuous attack chains or evolving threats."""
314
  log_content_section = f"""=== CURRENT LOG SEQUENCE TO ANALYZE (Window Size: {WINDOW_SIZE}) ===\n{log_sequence_text}\n=== END LOG SEQUENCE ==="""
315
-
316
  messages = [
317
  {"role": "system", "content": sys_prompt},
318
  {"role": "user", "content": f"{rag_instruction}\n\n{log_content_section}"}
319
  ]
320
-
321
  try:
322
  response_stream = client.chat_completion(messages, max_tokens=max_output_tokens, temperature=temperature, top_p=top_p, stream=False)
323
  if response_stream and response_stream.choices:
@@ -345,7 +347,7 @@ elif 'vector_store' in st.session_state:
345
  # === 檔案處理區塊 (批量分析檔案 - **優化重寫** ) ===
346
  if batch_uploaded_file:
347
  batch_file_key = f"batch_{batch_uploaded_file.name}_{batch_uploaded_file.size}"
348
-
349
  if st.session_state.batch_current_file_key != batch_file_key or 'json_data_for_batch' not in st.session_state:
350
  try:
351
  # 使用新的統一解析函式
@@ -358,7 +360,7 @@ if batch_uploaded_file:
358
  st.session_state.json_data_for_batch = parsed_data
359
  st.session_state.batch_current_file_key = batch_file_key
360
  st.toast(f"檔案已解析並轉換為 {len(parsed_data)} 個 Log 條目。", icon="✅")
361
-
362
  except Exception as e:
363
  st.error(f"檔案解析錯誤: {e}")
364
  if 'json_data_for_batch' in st.session_state:
@@ -375,20 +377,20 @@ if st.session_state.execute_batch_analysis and 'json_data_for_batch' in st.sessi
375
  st.session_state.execute_batch_analysis = False
376
  start_time = time.time()
377
  st.session_state.batch_results = []
378
-
379
  if inference_client is None:
380
  st.error("Client 未連線,無法執行。")
381
  else:
382
  # 在新的邏輯中,st.session_state.json_data_for_batch 已經是一個 List[Dict]
383
  logs_list = st.session_state.json_data_for_batch
384
-
385
  if logs_list:
386
  vs = st.session_state.get("vector_store", None)
387
-
388
  # --- 關鍵:在這裡做 JSON String 的轉換 ---
389
  # 確保 Prompt 收到的永遠是 JSON 格式的文字
390
  formatted_logs = [json.dumps(log, indent=2, ensure_ascii=False) for log in logs_list]
391
-
392
  analysis_sequences = []
393
  for i in range(len(formatted_logs)):
394
  start_index = max(0, i - WINDOW_SIZE + 1)
@@ -403,17 +405,17 @@ if st.session_state.execute_batch_analysis and 'json_data_for_batch' in st.sessi
403
  "target_log_id": i + 1,
404
  "original_log_entry": logs_list[i]
405
  })
406
-
407
  total_sequences = len(analysis_sequences)
408
  st.header(f"⚡ 批量分析執行中 (平移視窗 $N={WINDOW_SIZE}$)...")
409
  progress_bar = st.progress(0, text=f"準備處理 {total_sequences} 個序列...")
410
  results_container = st.container()
411
  full_report_chunks = ["## Cybersecurity Batch Analysis Report\n\n"]
412
-
413
  for i, seq_data in enumerate(analysis_sequences):
414
  log_id = seq_data["target_log_id"]
415
  progress_bar.progress((i + 1) / total_sequences, text=f"Processing {i + 1}/{total_sequences} (Log #{log_id})...")
416
-
417
  try:
418
  response, retrieved_ctx = generate_rag_response_hf_for_log(
419
  client=inference_client,
@@ -435,26 +437,26 @@ if st.session_state.execute_batch_analysis and 'json_data_for_batch' in st.sessi
435
  "context": retrieved_ctx
436
  }
437
  st.session_state.batch_results.append(item)
438
-
439
  with results_container:
440
  st.subheader(f"Log/Alert #{item['log_id']}")
441
  with st.expander("序列內容 (JSON Format)"):
442
  # 這裡顯示的會是 JSON 格式的 Log Sequence
443
  st.code(item["sequence_analyzed"], language='json')
444
-
445
  is_high = any(x in response.lower() for x in ['high-risk detected'])
446
  if is_high: st.error(item['analysis_result'])
447
  else: st.info(item['analysis_result'])
448
  if item['context']:
449
  with st.expander("參考 RAG 片段"): st.code(item['context'])
450
  st.markdown("---")
451
-
452
  log_content_str_for_report = json.dumps(item["log_content"], indent=2, ensure_ascii=False).replace("`", "\\`")
453
  full_report_chunks.append(f"---\n\n### Log #{item['log_id']}\n```json\n{log_content_str_for_report}\n```\nResult:\n{item['analysis_result']}\n")
454
-
455
  except Exception as e:
456
  st.error(f"Error Log {log_id}: {e}")
457
-
458
  end_time = time.time()
459
  progress_bar.empty()
460
  st.success(f"完成!耗時 {end_time - start_time:.2f} 秒。")
 
32
  # --- 頁面設定 ---
33
  st.set_page_config(page_title="Cybersecurity AI Assistant (Hugging Face RAG & Batch Analysis)", page_icon="🛡️", layout="wide")
34
  st.title("🛡️ Meta-Llama-3-8B-Instruct with FAISS RAG & Batch Analysis (Inference Client)")
35
+ st.markdown("已啟用:**IndexFlatIP** + **L2 正規化** + **Hugging Face Inference Client (API)**。支援 JSON/CSV/TXT/**LOG** 執行批量分析。") # <--- 這裡更新了說明
36
 
37
  # --- Streamlit Session State 初始化 (保持不變) ---
38
  if 'execute_batch_analysis' not in st.session_state:
 
49
  st.session_state.json_data_for_batch = None
50
 
51
  # 設定模型 ID
52
+ MODEL_ID = "meta-llama/Llama-4-Scout-17B-16E-Instruct"
53
  WINDOW_SIZE = 8
54
 
55
+ # === 核心檔案轉換函式 (CSV/TXT/LOG -> JSON List) ===
56
  def convert_csv_txt_to_json_list(file_content: bytes, file_type: str) -> List[Dict[str, Any]]:
57
  """
58
+ 將 CSV、TXTLOG 檔案內容 (假定為 CSV 格式,含標頭) 轉換為 JSON 物件列表。
 
59
  Args:
60
  file_content (bytes): 上傳檔案的二進位內容。
61
+ file_type (str): 檔案類型 ('csv', 'txt', 或 'log')。
 
62
  Returns:
63
  List[Dict[str, Any]]: 轉換後的 JSON 物件列表。
64
  """
 
65
  log_content = file_content.decode("utf-8").strip()
66
  if not log_content:
67
  return []
 
69
  # 使用 StringIO 讓 csv 模組可以處理字串內容
70
  string_io = io.StringIO(log_content)
71
 
72
+ # 嘗試使用 csv.DictReader 自動將第一行視為 Key
73
+ try:
74
+ reader = csv.DictReader(string_io)
75
+ except Exception as e:
76
+ # 如果檔案內容不是標準 CSV (例如純粹的無標頭 LOG 條目),csv.DictReader 可能會失敗
77
+ # 這裡的 fallback 邏輯將會處理
78
+ st.warning(f"使用 csv.DictReader 失敗,嘗試將檔案視為每行一個原始 Log 條目: {e}")
79
+ reader = None
80
+
81
  json_data = []
82
+
83
+ if reader:
84
+ # 定義需要轉換為數字的欄位名稱 (可根據您的需求擴充)
85
+ numeric_fields = ['sc-status', 'time-taken', 'bytes', 'resp-len', 'req-size']
86
+
87
+ for row in reader:
88
+ record = {}
89
+ for key, value in row.items():
90
+ if key is None: continue # 跳過沒有標頭的欄位
91
+ key = key.strip() # 清理 key
92
+ value = value.strip() if value else "" # value
93
+
94
+ # 處理數字轉換
95
+ if key in numeric_fields:
 
 
96
  try:
97
+ record[key] = int(value)
 
98
  except ValueError:
99
+ try:
100
+ record[key] = float(value)
101
+ except ValueError:
102
+ record[key] = value
103
+ else:
104
+ record[key] = value
 
105
 
106
+ if record: # 確保不是空紀錄
107
+ json_data.append(record)
108
+
109
+ # Fallback: 如果 csv.DictReader 失敗或沒有產生結果 (例如檔案是純 Log,沒有標準 CSV 標頭)
110
+ if not json_data:
111
+ # 嘗試將檔案視為每行一個原始 Log 條目
112
  string_io.seek(0)
113
  lines = string_io.readlines()
114
+ if len(lines) > 0:
115
  return [{"raw_log_entry": line.strip()} for line in lines if line.strip()]
116
+
117
  return json_data
118
 
119
+
120
  def convert_uploaded_file_to_json_list(uploaded_file) -> List[Dict[str, Any]]:
121
  """根據檔案類型,將上傳的檔案內容轉換為 Log JSON 列表。"""
122
  file_bytes = uploaded_file.getvalue()
 
142
  else:
143
  raise ValueError("JSON 檔案格式不支援 (非 List 或 Dict)。")
144
 
145
+ # --- Case 2, 3, & 4: CSV/TXT/LOG --- <--- 這裡增加了 .log
146
+ elif file_name_lower.endswith(('.csv', '.txt', '.log')):
147
+ # 假設 CSV/TXT/LOG 都是逗號分隔格式 (含標頭) 或每行一個原始 Log
148
+ file_type = 'csv' if file_name_lower.endswith('.csv') else ('log' if file_name_lower.endswith('.log') else 'txt')
149
+ return convert_csv_txt_to_json_list(file_bytes, file_type)
150
 
151
  else:
152
  raise ValueError("不支援的檔案類型。")
 
166
  # === 1. 批量分析檔案 (支援多種格式) ===
167
  batch_uploaded_file = st.file_uploader(
168
  "1️⃣ 上傳 **Log/Alert 檔案** (用於批量分析)",
169
+ type=['json', 'csv', 'txt', 'log'], # <--- 這裡增加了 'log'
170
  key="batch_uploader",
171
+ help="支援 JSON (Array), CSV (含標題), TXT/LOG (視為 CSV 或每行一個 Log)"
172
  )
173
 
174
  # === 2. RAG 知識庫檔案 ===
175
  rag_uploaded_file = st.file_uploader(
176
  "2️⃣ 上傳 **RAG 參考知識庫** (Logs/PDF/Code 等)",
177
+ type=['txt', 'py', 'log', 'csv', 'md', 'pdf'], # <--- 這裡增加了 'log'
178
  key="rag_uploader"
179
  )
180
  st.divider()
 
213
  if st.button("🗑️ 清除所有紀錄"):
214
  for key in list(st.session_state.keys()):
215
  # 排除 HF_TOKEN,如果它在 session_state 中
216
+ if key != 'HF_TOKEN':
217
  del st.session_state[key]
218
  st.rerun()
219
 
 
241
 
242
  # === Embedding 模型 (保持不變) ===
243
  @st.cache_resource
 
244
  def load_embedding_model():
245
  model_kwargs = {'device': 'cpu', 'trust_remote_code': True}
246
  encode_kwargs = {'normalize_embeddings': False}
 
262
  else:
263
  stringio = io.StringIO(uploaded_file.getvalue().decode("utf-8"))
264
  text_content = stringio.read()
265
+
266
  if not text_content.strip(): return None, "File is empty"
267
+
268
  events = [line for line in text_content.splitlines() if line.strip()]
269
  docs = [Document(page_content=e) for e in events]
270
  if not docs: return None, "No documents created"
271
+
272
  embeddings = embedding_model.embed_documents([d.page_content for d in docs])
273
  embeddings_np = np.array(embeddings).astype("float32")
274
  faiss.normalize_L2(embeddings_np)
275
+
276
  dimension = embeddings_np.shape[1]
277
  index = faiss.IndexFlatIP(dimension)
278
  index.add(embeddings_np)
279
+
280
  doc_ids = [str(uuid.uuid4()) for _ in range(len(docs))]
281
  docstore = InMemoryDocstore({_id: doc for _id, doc in zip(doc_ids, docs)})
282
  index_to_docstore_id = {i: _id for i, _id in enumerate(doc_ids)}
283
+
284
  vector_store = FAISS(embedding_function=embedding_model, index=index, docstore=docstore, index_to_docstore_id=index_to_docstore_id, distance_strategy=DistanceStrategy.COSINE)
285
  return vector_store, f"{len(docs)} chunks created."
286
  except Exception as e:
 
311
  if selected:
312
  retrieved_contents = [f"--- Reference Chunk (sim={score:.3f}) ---\n{doc.page_content}" for i, (doc, score) in enumerate(selected[:5])]
313
  context_text = "\n".join(retrieved_contents)
314
+
315
  rag_instruction = f"""=== RETRIEVED REFERENCE CONTEXT (Cosine ≥ {threshold}) ==={context_text if context_text else 'No relevant reference context found.'}=== END REFERENCE CONTEXT ===\nANALYSIS INSTRUCTION: {user_prompt}\nBased on the provided LOG SEQUENCE and REFERENCE CONTEXT, you must analyze the **entire sequence** to detect any continuous attack chains or evolving threats."""
316
  log_content_section = f"""=== CURRENT LOG SEQUENCE TO ANALYZE (Window Size: {WINDOW_SIZE}) ===\n{log_sequence_text}\n=== END LOG SEQUENCE ==="""
317
+
318
  messages = [
319
  {"role": "system", "content": sys_prompt},
320
  {"role": "user", "content": f"{rag_instruction}\n\n{log_content_section}"}
321
  ]
322
+
323
  try:
324
  response_stream = client.chat_completion(messages, max_tokens=max_output_tokens, temperature=temperature, top_p=top_p, stream=False)
325
  if response_stream and response_stream.choices:
 
347
  # === 檔案處理區塊 (批量分析檔案 - **優化重寫** ) ===
348
  if batch_uploaded_file:
349
  batch_file_key = f"batch_{batch_uploaded_file.name}_{batch_uploaded_file.size}"
350
+
351
  if st.session_state.batch_current_file_key != batch_file_key or 'json_data_for_batch' not in st.session_state:
352
  try:
353
  # 使用新的統一解析函式
 
360
  st.session_state.json_data_for_batch = parsed_data
361
  st.session_state.batch_current_file_key = batch_file_key
362
  st.toast(f"檔案已解析並轉換為 {len(parsed_data)} 個 Log 條目。", icon="✅")
363
+
364
  except Exception as e:
365
  st.error(f"檔案解析錯誤: {e}")
366
  if 'json_data_for_batch' in st.session_state:
 
377
  st.session_state.execute_batch_analysis = False
378
  start_time = time.time()
379
  st.session_state.batch_results = []
380
+
381
  if inference_client is None:
382
  st.error("Client 未連線,無法執行。")
383
  else:
384
  # 在新的邏輯中,st.session_state.json_data_for_batch 已經是一個 List[Dict]
385
  logs_list = st.session_state.json_data_for_batch
386
+
387
  if logs_list:
388
  vs = st.session_state.get("vector_store", None)
389
+
390
  # --- 關鍵:在這裡做 JSON String 的轉換 ---
391
  # 確保 Prompt 收到的永遠是 JSON 格式的文字
392
  formatted_logs = [json.dumps(log, indent=2, ensure_ascii=False) for log in logs_list]
393
+
394
  analysis_sequences = []
395
  for i in range(len(formatted_logs)):
396
  start_index = max(0, i - WINDOW_SIZE + 1)
 
405
  "target_log_id": i + 1,
406
  "original_log_entry": logs_list[i]
407
  })
408
+
409
  total_sequences = len(analysis_sequences)
410
  st.header(f"⚡ 批量分析執行中 (平移視窗 $N={WINDOW_SIZE}$)...")
411
  progress_bar = st.progress(0, text=f"準備處理 {total_sequences} 個序列...")
412
  results_container = st.container()
413
  full_report_chunks = ["## Cybersecurity Batch Analysis Report\n\n"]
414
+
415
  for i, seq_data in enumerate(analysis_sequences):
416
  log_id = seq_data["target_log_id"]
417
  progress_bar.progress((i + 1) / total_sequences, text=f"Processing {i + 1}/{total_sequences} (Log #{log_id})...")
418
+
419
  try:
420
  response, retrieved_ctx = generate_rag_response_hf_for_log(
421
  client=inference_client,
 
437
  "context": retrieved_ctx
438
  }
439
  st.session_state.batch_results.append(item)
440
+
441
  with results_container:
442
  st.subheader(f"Log/Alert #{item['log_id']}")
443
  with st.expander("序列內容 (JSON Format)"):
444
  # 這裡顯示的會是 JSON 格式的 Log Sequence
445
  st.code(item["sequence_analyzed"], language='json')
446
+
447
  is_high = any(x in response.lower() for x in ['high-risk detected'])
448
  if is_high: st.error(item['analysis_result'])
449
  else: st.info(item['analysis_result'])
450
  if item['context']:
451
  with st.expander("參考 RAG 片段"): st.code(item['context'])
452
  st.markdown("---")
453
+
454
  log_content_str_for_report = json.dumps(item["log_content"], indent=2, ensure_ascii=False).replace("`", "\\`")
455
  full_report_chunks.append(f"---\n\n### Log #{item['log_id']}\n```json\n{log_content_str_for_report}\n```\nResult:\n{item['analysis_result']}\n")
456
+
457
  except Exception as e:
458
  st.error(f"Error Log {log_id}: {e}")
459
+
460
  end_time = time.time()
461
  progress_bar.empty()
462
  st.success(f"完成!耗時 {end_time - start_time:.2f} 秒。")