Spaces:

howard9963
/

testComplianceLocal

Sleeping

App Files Files Community

testComplianceLocal / app.py

howard9963

Upload app.py

dde2f3c verified 6 months ago

raw

history blame contribute delete

29.1 kB

	# app.py
	# 法遵自評覆核（Hugging Face Spaces 版）
	# - PDF 讀取：先用 PyMuPDF 擷取文字；若該頁無文字則以 PaddleOCR（lang="ch"）進行 OCR（含繁體）
	# - LLM：本地 Hugging Face Transformers（預設 meta-llama/Meta-Llama-3.1-8B-Instruct），首次自動下載到 /data/hf
	# - 自評檢核規則：預設從 config/config.json 的 DEFAULT_RULES 載入（可在 UI 中編輯）
	# - 每個函式皆加上 try/except 與註解；重要步驟 print log 便於除錯
	# - UI：內規多檔上傳（可預覽）、自評規則（必填）、自評文字或自評附檔（二擇一）、Debug Log 顯示切換

	import os
	import io
	import re
	import json
	import time
	from typing import List, Dict, Tuple, Optional

	import gradio as gr
	import fitz # PyMuPDF
	import numpy as np
	from PIL import Image

	# HF 變數與快取位置（首次下載後可持久化）
	os.environ.setdefault("TRANSFORMERS_CACHE", "/data/hf")
	os.environ.setdefault("HF_HOME", "/data/hf")

	# -----------------------------
	# 設定檔載入（含預設值與錯誤處理）
	# -----------------------------
	BASE_DIR = os.path.dirname(os.path.abspath(__file__))
	CONFIG_PATH = os.path.join(BASE_DIR, "Config", "config.json")

	# 預設 fallback（若找不到 config.json）
	_default_cfg = {
	"DEFAULT_RULES": "【請在此貼上你的檢核規則】",
	"DEMO_PROMPT_INSTRUCTIONS": (
	"你是一位法遵／合規覆核專家。請依『檢核規範』逐點比對『文件全文』，並嚴格以 JSON 回覆："
	'{"符合情況":"符合\|不符合\|部分符合","原因":[],"改進建議":[],"規則逐點檢核":[{"規則編號":"","規則內容":"",'
	'"判斷":"符合\|不符合\|部分符合","理由":"","建議":""}]}'
	),
	"SYSTEM_MESSAGE": "你是嚴謹的法遵覆核專家，請以審計可追溯為原則回覆並僅輸出 JSON。",
	"temperature": 0.2,
	"MAX_TOKENS_INPUT": 100000
	}

	try:
	with open(CONFIG_PATH, "r", encoding="utf-8") as f:
	cfg = json.load(f) or {}
	print(f"[CONFIG] Loaded: {CONFIG_PATH}")
	except FileNotFoundError:
	print(f"[CONFIG][WARN] {CONFIG_PATH} not found, using defaults.")
	cfg = dict(_default_cfg)
	except Exception as e:
	print(f"[CONFIG][ERROR] Failed to load config.json: {e}. Using defaults.")
	cfg = dict(_default_cfg)

	DEFAULT_RULES = (cfg.get("DEFAULT_RULES") or _default_cfg["DEFAULT_RULES"]).strip()
	DEMO_PROMPT_INSTRUCTIONS = (cfg.get("DEMO_PROMPT_INSTRUCTIONS") or _default_cfg["DEMO_PROMPT_INSTRUCTIONS"]).strip()
	SYSTEM_MESSAGE = (cfg.get("SYSTEM_MESSAGE") or _default_cfg["SYSTEM_MESSAGE"]).strip()
	TEMPERATURE = float(cfg.get("temperature", _default_cfg["temperature"]))
	MAX_TOKENS_INPUT = int(cfg.get("MAX_TOKENS_INPUT", _default_cfg["MAX_TOKENS_INPUT"]))

	# -----------------------------
	# PaddleOCR 初始化（lang='ch' 同時支援簡/繁）
	# -----------------------------
	import torch
	from paddleocr import PaddleOCR

	OCR = None
	def _init_ocr() -> Optional[PaddleOCR]:
	"""初始化 PaddleOCR（採用 lang='ch'，支援簡/繁；自動偵測 GPU/CPU）。"""
	try:
	print("[OCR] Initializing PaddleOCR (lang='ch', PP-OCRv4)")
	ocr = PaddleOCR(
	lang="ch",
	use_angle_cls=True,
	use_gpu=torch.cuda.is_available(),
	ocr_version="PP-OCRv4",
	show_log=False
	)
	print("[OCR] Ready.")
	return ocr
	except Exception as e:
	print(f"[OCR][ERROR] init failed: {e}")
	return None

	# -----------------------------
	# 本地 LLaMA（Transformers）
	# -----------------------------
	from transformers import AutoModelForCausalLM, AutoTokenizer

	LOCAL_MODEL_ID = os.getenv("LOCAL_MODEL_ID", "meta-llama/Meta-Llama-3.1-8B-Instruct")
	HF_TOKEN = os.getenv("HF_TOKEN", None)

	_hf_tok = None
	_hf_model = None

	def _ensure_local_model(logs: Optional[List[str]] = None) -> None:
	"""確保本地模型已載入（首次呼叫時自動下載/載入到 /data/hf）。"""
	global _hf_tok, _hf_model
	try:
	if _hf_tok is not None and _hf_model is not None:
	return
	if logs is not None:
	logs.append(f"[LOCAL LLM] Loading model: {LOCAL_MODEL_ID}")
	print(f"[LLM] Loading {LOCAL_MODEL_ID} (cache={os.environ.get('TRANSFORMERS_CACHE')})")
	_hf_tok = AutoTokenizer.from_pretrained(
	LOCAL_MODEL_ID, use_fast=True, cache_dir=os.environ["TRANSFORMERS_CACHE"], token=HF_TOKEN
	)
	_hf_model = AutoModelForCausalLM.from_pretrained(
	LOCAL_MODEL_ID,
	torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
	device_map="auto",
	cache_dir=os.environ["TRANSFORMERS_CACHE"],
	token=HF_TOKEN
	)
	if logs is not None:
	logs.append("[LOCAL LLM] Model ready.")
	print("[LLM] Ready.")
	except Exception as e:
	if logs is not None:
	logs.append(f"[LOCAL LLM][ERROR] load failed: {e}")
	print(f"[LLM][ERROR] load failed: {e}")
	raise
	# ---------- Robust JSON parsing helpers ----------
	def _strip_code_fences(s: str) -> str:
	s = s.strip()
	if s.startswith("```"):
	s = s[3:]
	if "```" in s:
	s = s.split("```", 1)[0]
	s = s.replace("```json", "").replace("```JSON", "").strip("` \n\r\t")
	return s

	def _extract_first_brace_block(s: str) -> str:
	start = s.find("{")
	if start == -1:
	return s
	depth = 0
	for i in range(start, len(s)):
	if s[i] == "{":
	depth += 1
	elif s[i] == "}":
	depth -= 1
	if depth == 0:
	return s[start:i+1]
	return s

	def safe_parse_json(text: str) -> dict:
	"""
	先嚴格 json.loads；失敗則：
	1) 去掉 code fences/markdown
	2) 擷取第一個平衡的 {...}
	3) 嘗試 json5（允許單引號、尾逗號）
	4) 修補全形/花式引號與 BOM；必要時把整體單引號轉雙引號
	"""
	import json as _json

	# 直接試一次
	try:
	return _json.loads(text)
	except Exception:
	pass

	s = _strip_code_fences(text)
	s = _extract_first_brace_block(s)

	try:
	return _json.loads(s)
	except Exception:
	pass

	# 可選：json5（若未安裝會直接跳過）
	try:
	import json5 # type: ignore
	return json5.loads(s)
	except Exception:
	pass

	# 修補引號與 BOM
	repaired = (
	s.replace("\u201c", '"').replace("\u201d", '"')
	.replace("\u2018", "'").replace("\u2019", "'")
	.replace("\ufeff", "").strip()
	)
	if "'" in repaired and '"' not in repaired:
	repaired = repaired.replace("'", '"')
	return _json.loads(repaired)
	def extract_model_reply(full_text, prompt):
	"""
	從模型完整輸出中，移除 prompt 和任何 system、assistant 等前置內容
	"""
	try:
	# 如果模型有把 prompt 或 system 一起回顯，先找最後一次 user 提問位置
	markers = ["user", "User", "使用者", prompt.strip()]
	last_pos = -1
	for m in markers:
	pos = full_text.rfind(m)
	if pos > last_pos:
	last_pos = pos

	# 從最後 marker 後面開始取內容
	if last_pos != -1:
	reply = full_text[last_pos + len(markers[-1]):]
	else:
	reply = full_text

	# 移除多餘空白與換行
	return reply.strip()
	except Exception as e:
	print(f"[extract_model_reply 錯誤] {e}")
	return full_text.strip()

	# === 放在 safe_parse_json 之後：用「正則」擷取 full_text 中最後一個完整 JSON 物件 ===
	try:
	import regex as re2 # 第三方 regex，支援遞迴 (?R)
	except Exception:
	re2 = None

	def extract_last_json_block(text: str) -> Optional[str]:
	"""
	以 regex 擷取最後一個平衡的大括號 JSON 物件：
	- 優先使用第三方 `regex` 的遞迴 (?R) 來比對平衡大括號
	- 若無法使用 `regex`，改用手動堆疊法做 fallback
	回傳：最後一個 JSON 物件字串；若找不到回傳 None
	"""
	try:
	s = _strip_code_fences(text)

	# 1) 使用 regex (?R) 遞迴：{\n ... { ... } ... \n}
	if re2 is not None:
	pattern = re2.compile(r"\{(?:[^{}]\|(?R))*\}", flags=re2.DOTALL)
	matches = [m.group(0) for m in pattern.finditer(s)]
	return matches[-1] if matches else None

	# 2) 無 regex 模組 → 手動掃描平衡大括號
	blocks = []
	depth = 0
	start = None
	for i, ch in enumerate(s):
	if ch == "{":
	if depth == 0:
	start = i
	depth += 1
	elif ch == "}":
	if depth > 0:
	depth -= 1
	if depth == 0 and start is not None:
	blocks.append(s[start:i+1])
	start = None
	return blocks[-1] if blocks else None
	except Exception as e:
	print(f"[JSON-EXTRACT][ERROR] {e}")
	return None


	def call_llm(messages: List[dict], model: str, logs: List[str]) -> dict:
	"""
	保留原名稱 call_llm，但改為本地 LLaMA。
	嚴格要求僅輸出 JSON；若混入其他文本，會正則擷取第一個 {...}。
	"""
	start_time = time.time() # 計時開始
	try:
	_ensure_local_model(logs)
	# 準備 chat prompt（加上 JSON 輸出約束）
	sys_txt = messages[0].get("content", "") if messages else ""
	usr_txt = messages[1].get("content", "") if len(messages) > 1 else ""
	extra_rules = "\n\n請務必只輸出單一 JSON 物件，不得包含任何 JSON 之外的文字或符號。"
	print('準備 chat prompt（加上 JSON 輸出約束）')
	chat = [
	{"role": "system", "content": sys_txt},
	{"role": "user", "content": usr_txt + extra_rules}
	]
	print(f"user content:{usr_txt + extra_rules}")

	prompt = _hf_tok.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)

	inputs = _hf_tok(prompt, return_tensors="pt").to(_hf_model.device)
	print("inputs")
	with torch.no_grad():
	out_ids = _hf_model.generate(
	**inputs,
	max_new_tokens=1024,
	temperature=float(TEMPERATURE),
	do_sample=(float(TEMPERATURE) > 0),
	eos_token_id=_hf_tok.eos_token_id,
	pad_token_id=_hf_tok.eos_token_id
	)
	print("torch.no_grad")
	# 解碼生成內容後
	full_text = _hf_tok.decode(out_ids[0], skip_special_tokens=True)



	# ★ 使用 regex/堆疊法：從 full_text 擷取「最後一個」完整 JSON 物件
	candidate = extract_last_json_block(full_text)
	gen_text = candidate if candidate is not None else full_text # 若找不到就用原文（後續 safe_parse_json 仍會嘗試）
	logs.append(f"[LOCAL LLM] raw_len={len(gen_text)}")
	logs.append(f"[LOCAL LLM] gen_text={gen_text}")
	logs.append(f"[LOCAL LLM] prompt={prompt}")
	logs.append(f"[LOCAL LLM] full_text={full_text}")
	# 強韌解析
	try:
	data = safe_parse_json(gen_text)
	logs.append("[LOCAL LLM] JSON 解析成功")
	return data
	except Exception as jerr:
	logs.append(f"[LOCAL LLM] JSON 解析失敗：{jerr}")
	return {
	"符合情況": "部分符合",
	"原因": [f"模型輸出非合法 JSON：{str(jerr)}"],
	"改進建議": ["請調整提示詞，要求嚴格輸出 JSON（雙引號、無註解、無多餘文字）。"],
	"規則逐點檢核": []
	}
	return data
	except Exception as e:
	logs.append(f"[LOCAL LLM] JSON 解析失敗：{e}")
	return {
	"符合情況": "部分符合",
	"原因": [f"本地模型錯誤：{e}"],
	"改進建議": ["請檢查 Hugging Face 權杖與模型權限、或改用較小模型。"],
	"規則逐點檢核": []
	}
	finally:
	elapsed = time.time() - start_time
	logs.append(f"[LOCAL LLM] 本次執行耗時：{elapsed:.2f} 秒")
	# -----------------------------
	# 檔案讀取：TXT / PDF（含 OCR fallback）
	# -----------------------------
	def _read_text_file(path: str) -> str:
	"""讀取 TXT：嘗試多種常見編碼（台灣環境可能遇到 cp950/big5）。"""
	try:
	print(f"[READ] TXT: {os.path.basename(path)}")
	encodings = ("utf-8", "utf-8-sig", "cp950", "big5", "latin1")
	for enc in encodings:
	try:
	with open(path, "r", encoding=enc, errors="ignore") as f:
	return f.read()
	except Exception:
	continue
	with open(path, "rb") as f:
	return f.read().decode("utf-8", errors="ignore")
	except Exception as e:
	print(f"[READ][ERROR] TXT {path}: {e}")
	return ""

	def _ensure_ocr_ready():
	"""Lazy 初始化 OCR（避免在未用到時佔資源）。"""
	global OCR
	if OCR is None:
	OCR = _init_ocr()

	def _ocr_page_text(page) -> str:
	"""將 PDF 頁面轉圖後以 OCR 讀取文字。"""
	try:
	_ensure_ocr_ready()
	if OCR is None:
	return ""
	pix = page.get_pixmap(dpi=240)
	img = Image.open(io.BytesIO(pix.tobytes("png"))).convert("RGB")
	res = OCR.ocr(np.array(img), cls=True)
	if not res or not res[0]:
	return ""
	return "\n".join([line[1][0] for line in res[0]]).strip()
	except Exception as e:
	print(f"[OCR][ERROR] {e}")
	return ""

	def _read_pdf_text(path: str) -> Tuple[str, int]:
	"""
	讀取 PDF：若該頁抽不到文字（可能是掃描影像），則以 OCR 進行辨識。
	回傳：(全文, 頁數)
	"""
	try:
	print(f"[READ] PDF: {os.path.basename(path)}")
	parts: List[str] = []
	with fitz.open(path) as doc:
	for page in doc:
	txt = (page.get_text("text") or "").strip()
	if len(txt) < 20: # 低於門檻判定影像頁
	ocr_txt = _ocr_page_text(page)
	parts.append(ocr_txt)
	else:
	parts.append(txt)
	return "\n".join(parts).strip(), len(doc)
	except Exception as e:
	print(f"[READ][ERROR] PDF {path}: {e}")
	return "", 0

	def _read_file_to_text(file_path: Optional[str]) -> Tuple[str, str]:
	"""統一入口：根據副檔名分派至 TXT/PDF 讀取函式。"""
	try:
	if not file_path:
	raise ValueError("未收到檔案路徑")
	file_path = os.fspath(file_path)
	if not os.path.exists(file_path):
	raise FileNotFoundError(f"檔案不存在：{file_path}")
	fname = os.path.basename(file_path)
	ext = os.path.splitext(fname)[1].lower()
	if ext == ".txt":
	content = _read_text_file(file_path)
	return content, f"讀取 TXT：{fname}（長度：{len(content)}）"
	elif ext == ".pdf":
	content, pages = _read_pdf_text(file_path)
	return content, f"讀取 PDF：{fname}（頁數：{pages}；長度：{len(content)}）"
	else:
	raise ValueError("僅支援 .txt 或 .pdf")
	except Exception as e:
	print(f"[READ][ERROR] {e}")
	return "", f"[ERROR] {e}"

	# -----------------------------
	# 多檔輔助：清單摘要 / 快取與預覽
	# -----------------------------
	def _read_multi(paths: Optional[List[str]]) -> Dict[str, str]:
	"""批次讀取多檔，回傳 {檔名: 文字內容}，忽略讀取錯誤。"""
	cache: Dict[str, str] = {}
	try:
	if not paths:
	return cache
	for p in paths:
	if not p:
	continue
	try:
	name = os.path.basename(os.fspath(p))
	text, _ = _read_file_to_text(p)
	cache[name] = text
	except Exception as e:
	print(f"[READ][WARN] skip {p}: {e}")
	continue
	return cache
	except Exception as e:
	print(f"[READ_MULTI][ERROR] {e}")
	return cache

	def _summarize_paths(paths: Optional[List[str]]) -> str:
	"""列出上傳檔案清單（含大小），供 UI 顯示。"""
	try:
	if not paths:
	return "（尚未上傳任何檔案）"
	lines = []
	for p in paths:
	try:
	name = os.path.basename(os.fspath(p))
	size = os.path.getsize(os.fspath(p))
	lines.append(f"- {name}（{size} bytes）")
	except Exception:
	lines.append(f"- {os.path.basename(str(p))}")
	return "\n".join(lines)
	except Exception as e:
	print(f"[SUMMARY][ERROR] {e}")
	return "（清單生成失敗）"

	def on_files_change(paths: Optional[List[str]]):
	"""Gradio callback：檔案更新 → 更新清單 / 勾選選項 / 預覽區 / 快取。"""
	try:
	cache = _read_multi(paths)
	choices = list(cache.keys())
	return (
	_summarize_paths(paths),
	gr.update(choices=choices, value=[]),
	gr.update(value="", visible=False),
	cache,
	)
	except Exception as e:
	print(f"[FILES_CHANGE][ERROR] {e}")
	return "（更新失敗）", gr.update(), gr.update(), {}

	def on_show_toggle(selected_names: List[str], cache: Dict[str, str]):
	"""Gradio callback：勾選要預覽的檔名 → 顯示合併內容（截斷顯示）。"""
	try:
	if not selected_names:
	return gr.update(value="", visible=False)
	parts = []
	preview_limit = max(2000, MAX_TOKENS_INPUT // 5)
	for name in selected_names:
	txt = cache.get(name, "")
	if len(txt) > preview_limit:
	txt = txt[:preview_limit] + "\n...[內容過長，已截斷顯示]"
	parts.append(f"===== {name} =====\n{txt}")
	return gr.update(value="\n\n".join(parts), visible=True)
	except Exception as e:
	print(f"[SHOW_TOGGLE][ERROR] {e}")
	return gr.update(value=f"[ERROR] {e}", visible=True)

	# -----------------------------
	# 檢核前驗證 / Prompt 構建
	# -----------------------------
	def validate_before_run(
	policy_files: List[str],
	self_rules: str,
	self_text: str,
	self_files: List[str],
	) -> Tuple[bool, str]:
	"""檢核前的前置驗證：內規至少一檔、規則必填、自評文字或附檔至少其一。"""
	try:
	errors = []
	if not (policy_files and len(policy_files) > 0):
	errors.append("請上傳『單位適用內規（可多檔）』至少一個檔案。")
	if not (self_rules and self_rules.strip()):
	errors.append("請輸入『自評檢核規則』。")
	if not (self_text and self_text.strip()) and not (self_files and len(self_files) > 0):
	errors.append("請提供『單位自評』文字，或上傳『單位自評附檔（可多檔）』至少一個。")
	ok = (len(errors) == 0)
	return ok, "\n".join(errors)
	except Exception as e:
	print(f"[VALIDATE][ERROR] {e}")
	return False, f"驗證失敗：{e}"

	def build_prompt(rules: str, doc_text: str) -> List[dict]:
	"""依規則與文件全文構建 chat messages。"""
	try:
	system_msg = {"role": "system", "content": SYSTEM_MESSAGE}
	user_msg = {
	"role": "user",
	"content": f"{DEMO_PROMPT_INSTRUCTIONS}\n\n[檢核規範]\n{rules}\n\n[文件全文]\n{doc_text}"
	}
	return [system_msg, user_msg]
	except Exception as e:
	print(f"[PROMPT][ERROR] {e}")
	return [{"role": "system", "content": SYSTEM_MESSAGE},
	{"role": "user", "content": "（構建 Prompt 失敗）"}]

	# -----------------------------
	# 主流程：執行檢核
	# -----------------------------
	def run_check_with_log(
	policy_files_paths, # 多檔內規（必填）
	self_rules_text, # 自評檢核規則（必填；預設載入 DEFAULT_RULES）
	self_text, # 自評文字（可空）
	self_files_paths # 多檔自評附檔（可空）
	):
	logs: List[str] = []
	try:
	# 驗證
	ok, msg = validate_before_run(policy_files_paths, self_rules_text, self_text, self_files_paths)
	if not ok:
	msg_display = msg.replace("\n", "；")
	logs.append(f"[VALIDATE] {msg_display}")
	return msg, None, None, "\n".join(logs)

	# 紀錄 / 讀取內規（目前僅作為流程留痕）
	for p in (policy_files_paths or []):
	t, info = _read_file_to_text(p)
	logs.append(f"[POLICY] {info}")

	# 取得自評內容（優先文字；否則合併自評附檔）
	doc_text = ""
	if self_text and self_text.strip():
	doc_text = self_text.strip()
	logs.append(f"[SELF] from textbox, len={len(doc_text)}")
	else:
	pieces = []
	for p in (self_files_paths or []):
	t, info = _read_file_to_text(p)
	logs.append(f"[SELF_FILE] {info}")
	if t.strip():
	pieces.append(t.strip())
	doc_text = "\n\n".join(pieces).strip()
	if not doc_text:
	return "未取得任何自評內容（文字與附檔皆為空）。", None, None, "\n".join(logs)

	# 長度限制（保守）
	if len(doc_text) > MAX_TOKENS_INPUT:
	logs.append(f"[WARN] self text too long ({len(doc_text)}) → truncate to {MAX_TOKENS_INPUT}")
	doc_text = doc_text[:MAX_TOKENS_INPUT]

	# 構建訊息並呼叫本地 LLM
	msgs = build_prompt(self_rules_text.strip(), doc_text)
	logs.append("=== Prompt ===")
	logs.append(f"[system] len={len(msgs[0]['content']) if msgs else 0}")
	logs.append(f"[user] len={len(msgs[1]['content']) if len(msgs)>1 else 0}")

	result_dict = call_llm(msgs, model="local", logs=logs)
	logs.append("[LLM] done.")

	pretty = json.dumps(result_dict, ensure_ascii=False, indent=2)
	table = [
	[i.get("規則編號",""), i.get("規則內容",""), i.get("判斷",""), i.get("理由",""), i.get("建議","")]
	for i in result_dict.get("規則逐點檢核", [])
	]
	info = f"自評內容長度：{len(doc_text)}；自評檢核規則長度：{len(self_rules_text.strip())}"
	return info, pretty, table, "\n".join(logs)
	except Exception as e:
	logs.append(f"[RUN][ERROR] {e}")
	return f"執行失敗：{e}", None, None, "\n".join(logs)

	def toggle_debug(current_visibility: bool):
	"""切換 Debug Log 顯示。"""
	try:
	return gr.update(visible=not current_visibility), not current_visibility
	except Exception as e:
	print(f"[DEBUG_TOGGLE][ERROR] {e}")
	return gr.update(), current_visibility

	# -----------------------------
	# Gradio 介面
	# -----------------------------
	with gr.Blocks(title="法遵自評覆核（PaddleOCR + LLaMA 本地）") as demo:
	gr.Markdown("# 法遵自評覆核（支援 PDF OCR：繁/簡中文；本地 LLaMA 推論）")
	gr.Markdown("步驟：上傳『單位適用內規（可多檔）』 → 輸入『自評檢核規則』（預設載入 config） → 輸入『單位自評』或上傳『自評附檔（可多檔）』 → 開始檢核。")

	# 內規（多檔）+ 清單 + 個別顯示開關 + 預覽（預設隱藏）
	with gr.Row():
	policy_files_in = gr.Files(
	label="上傳單位適用內規（可多檔，.txt 或 .pdf）",
	file_types=[".txt", ".pdf"],
	type="filepath"
	)
	policy_list_md = gr.Markdown("（尚未上傳任何檔案）")
	policy_show_chk = gr.CheckboxGroup(label="顯示哪些內規內容（個別開關，預設不勾選）", choices=[])
	policy_show_area = gr.Textbox(label="內規檔案內容預覽（僅顯示勾選者，可能截斷）", value="", lines=15, visible=False)
	policy_cache_state = gr.State({})

	# 自評檢核規則（預設由 config 載入，可編輯，視為必填）
	with gr.Row():
	self_rules_in = gr.Textbox(
	label="自評檢核規則（必填；預設載入自 config）",
	value=DEFAULT_RULES,
	lines=8,
	placeholder="請貼上要檢核自評內容的規則；例如：需包含抽檢紀錄、會辦流程、簽核證明等。"
	)

	# 自評文字（可空）
	with gr.Row():
	self_text_in = gr.Textbox(
	label="單位自評（文字輸入；可留白改上傳自評附檔）",
	value="",
	lines=10,
	placeholder="請輸入自評文字；或留白並在下方上傳自評附檔（可多檔）"
	)

	# 自評附檔（多檔）+ 清單 + 個別顯示開關 + 預覽（預設隱藏）
	with gr.Row():
	self_files_in = gr.Files(
	label="上傳單位自評附檔（可多檔，.txt 或 .pdf）",
	file_types=[".txt", ".pdf"],
	type="filepath"
	)
	self_list_md = gr.Markdown("（尚未上傳任何檔案）")
	self_show_chk = gr.CheckboxGroup(label="顯示哪些自評附檔內容（個別開關，預設不勾選）", choices=[])
	self_show_area = gr.Textbox(label="自評附檔內容預覽（僅顯示勾選者，可能截斷）", value="", lines=15, visible=False)
	self_cache_state = gr.State({})

	# 控制列
	with gr.Row():
	run_btn = gr.Button("開始檢核", variant="primary", interactive=False)
	toggle_debug_btn = gr.Button("切換 Debug Log 顯示/隱藏")

	info_out = gr.Markdown(label="讀檔／輸入資訊")
	json_out = gr.Code(label="檢核結果（JSON）", language="json")
	table_out = gr.Dataframe(headers=["規則編號","規則內容","判斷","理由","建議"], wrap=True)
	debug_log = gr.Textbox(label="Debug Log", lines=14, visible=False)
	debug_state = gr.State(False)

	# 內規：清單 + 勾選 + 預覽
	policy_files_in.change(
	fn=on_files_change,
	inputs=[policy_files_in],
	outputs=[policy_list_md, policy_show_chk, policy_show_area, policy_cache_state]
	)
	policy_show_chk.change(
	fn=on_show_toggle,
	inputs=[policy_show_chk, policy_cache_state],
	outputs=[policy_show_area]
	)

	# 自評附檔：清單 + 勾選 + 預覽
	self_files_in.change(
	fn=on_files_change,
	inputs=[self_files_in],
	outputs=[self_list_md, self_show_chk, self_show_area, self_cache_state]
	)
	self_show_chk.change(
	fn=on_show_toggle,
	inputs=[self_show_chk, self_cache_state],
	outputs=[self_show_area]
	)

	# 「開始檢核」互動條件：內規(至少一檔) + 自評規則(必填) + 自評(文字或檔案擇一)
	def _ready(policy_files, self_rules, self_text, self_files):
	try:
	ok, _ = validate_before_run(policy_files, self_rules, self_text, self_files)
	return gr.update(interactive=ok)
	except Exception as e:
	print(f"[READY][ERROR] {e}")
	return gr.update(interactive=False)

	policy_files_in.change(_ready, [policy_files_in, self_rules_in, self_text_in, self_files_in], [run_btn])
	self_rules_in.change(_ready, [policy_files_in, self_rules_in, self_text_in, self_files_in], [run_btn])
	self_text_in.change(_ready, [policy_files_in, self_rules_in, self_text_in, self_files_in], [run_btn])
	self_files_in.change(_ready, [policy_files_in, self_rules_in, self_text_in, self_files_in], [run_btn])

	# Run
	run_btn.click(
	fn=run_check_with_log,
	inputs=[policy_files_in, self_rules_in, self_text_in, self_files_in],
	outputs=[info_out, json_out, table_out, debug_log]
	)
	toggle_debug_btn.click(fn=toggle_debug, inputs=[debug_state], outputs=[debug_log, debug_state])

	if __name__ == "__main__":
	# Spaces 需綁 0.0.0.0；本地可保留相同設定
	demo.launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", "7860")), share=False)