howard9963's picture
Upload app.py
dde2f3c verified
# app.py
# 法遵自評覆核(Hugging Face Spaces 版)
# - PDF 讀取:先用 PyMuPDF 擷取文字;若該頁無文字則以 PaddleOCR(lang="ch")進行 OCR(含繁體)
# - LLM:本地 Hugging Face Transformers(預設 meta-llama/Meta-Llama-3.1-8B-Instruct),首次自動下載到 /data/hf
# - 自評檢核規則:預設從 config/config.json 的 DEFAULT_RULES 載入(可在 UI 中編輯)
# - 每個函式皆加上 try/except 與註解;重要步驟 print log 便於除錯
# - UI:內規多檔上傳(可預覽)、自評規則(必填)、自評文字或自評附檔(二擇一)、Debug Log 顯示切換
import os
import io
import re
import json
import time
from typing import List, Dict, Tuple, Optional
import gradio as gr
import fitz # PyMuPDF
import numpy as np
from PIL import Image
# HF 變數與快取位置(首次下載後可持久化)
os.environ.setdefault("TRANSFORMERS_CACHE", "/data/hf")
os.environ.setdefault("HF_HOME", "/data/hf")
# -----------------------------
# 設定檔載入(含預設值與錯誤處理)
# -----------------------------
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
CONFIG_PATH = os.path.join(BASE_DIR, "Config", "config.json")
# 預設 fallback(若找不到 config.json)
_default_cfg = {
"DEFAULT_RULES": "【請在此貼上你的檢核規則】",
"DEMO_PROMPT_INSTRUCTIONS": (
"你是一位法遵/合規覆核專家。請依『檢核規範』逐點比對『文件全文』,並嚴格以 JSON 回覆:"
'{"符合情況":"符合|不符合|部分符合","原因":[],"改進建議":[],"規則逐點檢核":[{"規則編號":"","規則內容":"",'
'"判斷":"符合|不符合|部分符合","理由":"","建議":""}]}'
),
"SYSTEM_MESSAGE": "你是嚴謹的法遵覆核專家,請以審計可追溯為原則回覆並僅輸出 JSON。",
"temperature": 0.2,
"MAX_TOKENS_INPUT": 100000
}
try:
with open(CONFIG_PATH, "r", encoding="utf-8") as f:
cfg = json.load(f) or {}
print(f"[CONFIG] Loaded: {CONFIG_PATH}")
except FileNotFoundError:
print(f"[CONFIG][WARN] {CONFIG_PATH} not found, using defaults.")
cfg = dict(_default_cfg)
except Exception as e:
print(f"[CONFIG][ERROR] Failed to load config.json: {e}. Using defaults.")
cfg = dict(_default_cfg)
DEFAULT_RULES = (cfg.get("DEFAULT_RULES") or _default_cfg["DEFAULT_RULES"]).strip()
DEMO_PROMPT_INSTRUCTIONS = (cfg.get("DEMO_PROMPT_INSTRUCTIONS") or _default_cfg["DEMO_PROMPT_INSTRUCTIONS"]).strip()
SYSTEM_MESSAGE = (cfg.get("SYSTEM_MESSAGE") or _default_cfg["SYSTEM_MESSAGE"]).strip()
TEMPERATURE = float(cfg.get("temperature", _default_cfg["temperature"]))
MAX_TOKENS_INPUT = int(cfg.get("MAX_TOKENS_INPUT", _default_cfg["MAX_TOKENS_INPUT"]))
# -----------------------------
# PaddleOCR 初始化(lang='ch' 同時支援簡/繁)
# -----------------------------
import torch
from paddleocr import PaddleOCR
OCR = None
def _init_ocr() -> Optional[PaddleOCR]:
"""初始化 PaddleOCR(採用 lang='ch',支援簡/繁;自動偵測 GPU/CPU)。"""
try:
print("[OCR] Initializing PaddleOCR (lang='ch', PP-OCRv4)")
ocr = PaddleOCR(
lang="ch",
use_angle_cls=True,
use_gpu=torch.cuda.is_available(),
ocr_version="PP-OCRv4",
show_log=False
)
print("[OCR] Ready.")
return ocr
except Exception as e:
print(f"[OCR][ERROR] init failed: {e}")
return None
# -----------------------------
# 本地 LLaMA(Transformers)
# -----------------------------
from transformers import AutoModelForCausalLM, AutoTokenizer
LOCAL_MODEL_ID = os.getenv("LOCAL_MODEL_ID", "meta-llama/Meta-Llama-3.1-8B-Instruct")
HF_TOKEN = os.getenv("HF_TOKEN", None)
_hf_tok = None
_hf_model = None
def _ensure_local_model(logs: Optional[List[str]] = None) -> None:
"""確保本地模型已載入(首次呼叫時自動下載/載入到 /data/hf)。"""
global _hf_tok, _hf_model
try:
if _hf_tok is not None and _hf_model is not None:
return
if logs is not None:
logs.append(f"[LOCAL LLM] Loading model: {LOCAL_MODEL_ID}")
print(f"[LLM] Loading {LOCAL_MODEL_ID} (cache={os.environ.get('TRANSFORMERS_CACHE')})")
_hf_tok = AutoTokenizer.from_pretrained(
LOCAL_MODEL_ID, use_fast=True, cache_dir=os.environ["TRANSFORMERS_CACHE"], token=HF_TOKEN
)
_hf_model = AutoModelForCausalLM.from_pretrained(
LOCAL_MODEL_ID,
torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
device_map="auto",
cache_dir=os.environ["TRANSFORMERS_CACHE"],
token=HF_TOKEN
)
if logs is not None:
logs.append("[LOCAL LLM] Model ready.")
print("[LLM] Ready.")
except Exception as e:
if logs is not None:
logs.append(f"[LOCAL LLM][ERROR] load failed: {e}")
print(f"[LLM][ERROR] load failed: {e}")
raise
# ---------- Robust JSON parsing helpers ----------
def _strip_code_fences(s: str) -> str:
s = s.strip()
if s.startswith("```"):
s = s[3:]
if "```" in s:
s = s.split("```", 1)[0]
s = s.replace("```json", "").replace("```JSON", "").strip("` \n\r\t")
return s
def _extract_first_brace_block(s: str) -> str:
start = s.find("{")
if start == -1:
return s
depth = 0
for i in range(start, len(s)):
if s[i] == "{":
depth += 1
elif s[i] == "}":
depth -= 1
if depth == 0:
return s[start:i+1]
return s
def safe_parse_json(text: str) -> dict:
"""
先嚴格 json.loads;失敗則:
1) 去掉 code fences/markdown
2) 擷取第一個平衡的 {...}
3) 嘗試 json5(允許單引號、尾逗號)
4) 修補全形/花式引號與 BOM;必要時把整體單引號轉雙引號
"""
import json as _json
# 直接試一次
try:
return _json.loads(text)
except Exception:
pass
s = _strip_code_fences(text)
s = _extract_first_brace_block(s)
try:
return _json.loads(s)
except Exception:
pass
# 可選:json5(若未安裝會直接跳過)
try:
import json5 # type: ignore
return json5.loads(s)
except Exception:
pass
# 修補引號與 BOM
repaired = (
s.replace("\u201c", '"').replace("\u201d", '"')
.replace("\u2018", "'").replace("\u2019", "'")
.replace("\ufeff", "").strip()
)
if "'" in repaired and '"' not in repaired:
repaired = repaired.replace("'", '"')
return _json.loads(repaired)
def extract_model_reply(full_text, prompt):
"""
從模型完整輸出中,移除 prompt 和任何 system、assistant 等前置內容
"""
try:
# 如果模型有把 prompt 或 system 一起回顯,先找最後一次 user 提問位置
markers = ["user", "User", "使用者", prompt.strip()]
last_pos = -1
for m in markers:
pos = full_text.rfind(m)
if pos > last_pos:
last_pos = pos
# 從最後 marker 後面開始取內容
if last_pos != -1:
reply = full_text[last_pos + len(markers[-1]):]
else:
reply = full_text
# 移除多餘空白與換行
return reply.strip()
except Exception as e:
print(f"[extract_model_reply 錯誤] {e}")
return full_text.strip()
# === 放在 safe_parse_json 之後:用「正則」擷取 full_text 中最後一個完整 JSON 物件 ===
try:
import regex as re2 # 第三方 regex,支援遞迴 (?R)
except Exception:
re2 = None
def extract_last_json_block(text: str) -> Optional[str]:
"""
以 regex 擷取最後一個平衡的大括號 JSON 物件:
- 優先使用第三方 `regex` 的遞迴 (?R) 來比對平衡大括號
- 若無法使用 `regex`,改用手動堆疊法做 fallback
回傳:最後一個 JSON 物件字串;若找不到回傳 None
"""
try:
s = _strip_code_fences(text)
# 1) 使用 regex (?R) 遞迴:{\n ... { ... } ... \n}
if re2 is not None:
pattern = re2.compile(r"\{(?:[^{}]|(?R))*\}", flags=re2.DOTALL)
matches = [m.group(0) for m in pattern.finditer(s)]
return matches[-1] if matches else None
# 2) 無 regex 模組 → 手動掃描平衡大括號
blocks = []
depth = 0
start = None
for i, ch in enumerate(s):
if ch == "{":
if depth == 0:
start = i
depth += 1
elif ch == "}":
if depth > 0:
depth -= 1
if depth == 0 and start is not None:
blocks.append(s[start:i+1])
start = None
return blocks[-1] if blocks else None
except Exception as e:
print(f"[JSON-EXTRACT][ERROR] {e}")
return None
def call_llm(messages: List[dict], model: str, logs: List[str]) -> dict:
"""
保留原名稱 call_llm,但改為本地 LLaMA。
嚴格要求僅輸出 JSON;若混入其他文本,會正則擷取第一個 {...}。
"""
start_time = time.time() # 計時開始
try:
_ensure_local_model(logs)
# 準備 chat prompt(加上 JSON 輸出約束)
sys_txt = messages[0].get("content", "") if messages else ""
usr_txt = messages[1].get("content", "") if len(messages) > 1 else ""
extra_rules = "\n\n請務必只輸出單一 JSON 物件,不得包含任何 JSON 之外的文字或符號。"
print('準備 chat prompt(加上 JSON 輸出約束)')
chat = [
{"role": "system", "content": sys_txt},
{"role": "user", "content": usr_txt + extra_rules}
]
print(f"user content:{usr_txt + extra_rules}")
prompt = _hf_tok.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
inputs = _hf_tok(prompt, return_tensors="pt").to(_hf_model.device)
print("inputs")
with torch.no_grad():
out_ids = _hf_model.generate(
**inputs,
max_new_tokens=1024,
temperature=float(TEMPERATURE),
do_sample=(float(TEMPERATURE) > 0),
eos_token_id=_hf_tok.eos_token_id,
pad_token_id=_hf_tok.eos_token_id
)
print("torch.no_grad")
# 解碼生成內容後
full_text = _hf_tok.decode(out_ids[0], skip_special_tokens=True)
# ★ 使用 regex/堆疊法:從 full_text 擷取「最後一個」完整 JSON 物件
candidate = extract_last_json_block(full_text)
gen_text = candidate if candidate is not None else full_text # 若找不到就用原文(後續 safe_parse_json 仍會嘗試)
logs.append(f"[LOCAL LLM] raw_len={len(gen_text)}")
logs.append(f"[LOCAL LLM] gen_text={gen_text}")
logs.append(f"[LOCAL LLM] prompt={prompt}")
logs.append(f"[LOCAL LLM] full_text={full_text}")
# 強韌解析
try:
data = safe_parse_json(gen_text)
logs.append("[LOCAL LLM] JSON 解析成功")
return data
except Exception as jerr:
logs.append(f"[LOCAL LLM] JSON 解析失敗:{jerr}")
return {
"符合情況": "部分符合",
"原因": [f"模型輸出非合法 JSON:{str(jerr)}"],
"改進建議": ["請調整提示詞,要求嚴格輸出 JSON(雙引號、無註解、無多餘文字)。"],
"規則逐點檢核": []
}
return data
except Exception as e:
logs.append(f"[LOCAL LLM] JSON 解析失敗:{e}")
return {
"符合情況": "部分符合",
"原因": [f"本地模型錯誤:{e}"],
"改進建議": ["請檢查 Hugging Face 權杖與模型權限、或改用較小模型。"],
"規則逐點檢核": []
}
finally:
elapsed = time.time() - start_time
logs.append(f"[LOCAL LLM] 本次執行耗時:{elapsed:.2f} 秒")
# -----------------------------
# 檔案讀取:TXT / PDF(含 OCR fallback)
# -----------------------------
def _read_text_file(path: str) -> str:
"""讀取 TXT:嘗試多種常見編碼(台灣環境可能遇到 cp950/big5)。"""
try:
print(f"[READ] TXT: {os.path.basename(path)}")
encodings = ("utf-8", "utf-8-sig", "cp950", "big5", "latin1")
for enc in encodings:
try:
with open(path, "r", encoding=enc, errors="ignore") as f:
return f.read()
except Exception:
continue
with open(path, "rb") as f:
return f.read().decode("utf-8", errors="ignore")
except Exception as e:
print(f"[READ][ERROR] TXT {path}: {e}")
return ""
def _ensure_ocr_ready():
"""Lazy 初始化 OCR(避免在未用到時佔資源)。"""
global OCR
if OCR is None:
OCR = _init_ocr()
def _ocr_page_text(page) -> str:
"""將 PDF 頁面轉圖後以 OCR 讀取文字。"""
try:
_ensure_ocr_ready()
if OCR is None:
return ""
pix = page.get_pixmap(dpi=240)
img = Image.open(io.BytesIO(pix.tobytes("png"))).convert("RGB")
res = OCR.ocr(np.array(img), cls=True)
if not res or not res[0]:
return ""
return "\n".join([line[1][0] for line in res[0]]).strip()
except Exception as e:
print(f"[OCR][ERROR] {e}")
return ""
def _read_pdf_text(path: str) -> Tuple[str, int]:
"""
讀取 PDF:若該頁抽不到文字(可能是掃描影像),則以 OCR 進行辨識。
回傳:(全文, 頁數)
"""
try:
print(f"[READ] PDF: {os.path.basename(path)}")
parts: List[str] = []
with fitz.open(path) as doc:
for page in doc:
txt = (page.get_text("text") or "").strip()
if len(txt) < 20: # 低於門檻判定影像頁
ocr_txt = _ocr_page_text(page)
parts.append(ocr_txt)
else:
parts.append(txt)
return "\n".join(parts).strip(), len(doc)
except Exception as e:
print(f"[READ][ERROR] PDF {path}: {e}")
return "", 0
def _read_file_to_text(file_path: Optional[str]) -> Tuple[str, str]:
"""統一入口:根據副檔名分派至 TXT/PDF 讀取函式。"""
try:
if not file_path:
raise ValueError("未收到檔案路徑")
file_path = os.fspath(file_path)
if not os.path.exists(file_path):
raise FileNotFoundError(f"檔案不存在:{file_path}")
fname = os.path.basename(file_path)
ext = os.path.splitext(fname)[1].lower()
if ext == ".txt":
content = _read_text_file(file_path)
return content, f"讀取 TXT:{fname}(長度:{len(content)})"
elif ext == ".pdf":
content, pages = _read_pdf_text(file_path)
return content, f"讀取 PDF:{fname}(頁數:{pages};長度:{len(content)})"
else:
raise ValueError("僅支援 .txt 或 .pdf")
except Exception as e:
print(f"[READ][ERROR] {e}")
return "", f"[ERROR] {e}"
# -----------------------------
# 多檔輔助:清單摘要 / 快取與預覽
# -----------------------------
def _read_multi(paths: Optional[List[str]]) -> Dict[str, str]:
"""批次讀取多檔,回傳 {檔名: 文字內容},忽略讀取錯誤。"""
cache: Dict[str, str] = {}
try:
if not paths:
return cache
for p in paths:
if not p:
continue
try:
name = os.path.basename(os.fspath(p))
text, _ = _read_file_to_text(p)
cache[name] = text
except Exception as e:
print(f"[READ][WARN] skip {p}: {e}")
continue
return cache
except Exception as e:
print(f"[READ_MULTI][ERROR] {e}")
return cache
def _summarize_paths(paths: Optional[List[str]]) -> str:
"""列出上傳檔案清單(含大小),供 UI 顯示。"""
try:
if not paths:
return "(尚未上傳任何檔案)"
lines = []
for p in paths:
try:
name = os.path.basename(os.fspath(p))
size = os.path.getsize(os.fspath(p))
lines.append(f"- {name}{size} bytes)")
except Exception:
lines.append(f"- {os.path.basename(str(p))}")
return "\n".join(lines)
except Exception as e:
print(f"[SUMMARY][ERROR] {e}")
return "(清單生成失敗)"
def on_files_change(paths: Optional[List[str]]):
"""Gradio callback:檔案更新 → 更新清單 / 勾選選項 / 預覽區 / 快取。"""
try:
cache = _read_multi(paths)
choices = list(cache.keys())
return (
_summarize_paths(paths),
gr.update(choices=choices, value=[]),
gr.update(value="", visible=False),
cache,
)
except Exception as e:
print(f"[FILES_CHANGE][ERROR] {e}")
return "(更新失敗)", gr.update(), gr.update(), {}
def on_show_toggle(selected_names: List[str], cache: Dict[str, str]):
"""Gradio callback:勾選要預覽的檔名 → 顯示合併內容(截斷顯示)。"""
try:
if not selected_names:
return gr.update(value="", visible=False)
parts = []
preview_limit = max(2000, MAX_TOKENS_INPUT // 5)
for name in selected_names:
txt = cache.get(name, "")
if len(txt) > preview_limit:
txt = txt[:preview_limit] + "\n...[內容過長,已截斷顯示]"
parts.append(f"===== {name} =====\n{txt}")
return gr.update(value="\n\n".join(parts), visible=True)
except Exception as e:
print(f"[SHOW_TOGGLE][ERROR] {e}")
return gr.update(value=f"[ERROR] {e}", visible=True)
# -----------------------------
# 檢核前驗證 / Prompt 構建
# -----------------------------
def validate_before_run(
policy_files: List[str],
self_rules: str,
self_text: str,
self_files: List[str],
) -> Tuple[bool, str]:
"""檢核前的前置驗證:內規至少一檔、規則必填、自評文字或附檔至少其一。"""
try:
errors = []
if not (policy_files and len(policy_files) > 0):
errors.append("請上傳『單位適用內規(可多檔)』至少一個檔案。")
if not (self_rules and self_rules.strip()):
errors.append("請輸入『自評檢核規則』。")
if not (self_text and self_text.strip()) and not (self_files and len(self_files) > 0):
errors.append("請提供『單位自評』文字,或上傳『單位自評附檔(可多檔)』至少一個。")
ok = (len(errors) == 0)
return ok, "\n".join(errors)
except Exception as e:
print(f"[VALIDATE][ERROR] {e}")
return False, f"驗證失敗:{e}"
def build_prompt(rules: str, doc_text: str) -> List[dict]:
"""依規則與文件全文構建 chat messages。"""
try:
system_msg = {"role": "system", "content": SYSTEM_MESSAGE}
user_msg = {
"role": "user",
"content": f"{DEMO_PROMPT_INSTRUCTIONS}\n\n[檢核規範]\n{rules}\n\n[文件全文]\n{doc_text}"
}
return [system_msg, user_msg]
except Exception as e:
print(f"[PROMPT][ERROR] {e}")
return [{"role": "system", "content": SYSTEM_MESSAGE},
{"role": "user", "content": "(構建 Prompt 失敗)"}]
# -----------------------------
# 主流程:執行檢核
# -----------------------------
def run_check_with_log(
policy_files_paths, # 多檔內規(必填)
self_rules_text, # 自評檢核規則(必填;預設載入 DEFAULT_RULES)
self_text, # 自評文字(可空)
self_files_paths # 多檔自評附檔(可空)
):
logs: List[str] = []
try:
# 驗證
ok, msg = validate_before_run(policy_files_paths, self_rules_text, self_text, self_files_paths)
if not ok:
msg_display = msg.replace("\n", ";")
logs.append(f"[VALIDATE] {msg_display}")
return msg, None, None, "\n".join(logs)
# 紀錄 / 讀取內規(目前僅作為流程留痕)
for p in (policy_files_paths or []):
t, info = _read_file_to_text(p)
logs.append(f"[POLICY] {info}")
# 取得自評內容(優先文字;否則合併自評附檔)
doc_text = ""
if self_text and self_text.strip():
doc_text = self_text.strip()
logs.append(f"[SELF] from textbox, len={len(doc_text)}")
else:
pieces = []
for p in (self_files_paths or []):
t, info = _read_file_to_text(p)
logs.append(f"[SELF_FILE] {info}")
if t.strip():
pieces.append(t.strip())
doc_text = "\n\n".join(pieces).strip()
if not doc_text:
return "未取得任何自評內容(文字與附檔皆為空)。", None, None, "\n".join(logs)
# 長度限制(保守)
if len(doc_text) > MAX_TOKENS_INPUT:
logs.append(f"[WARN] self text too long ({len(doc_text)}) → truncate to {MAX_TOKENS_INPUT}")
doc_text = doc_text[:MAX_TOKENS_INPUT]
# 構建訊息並呼叫本地 LLM
msgs = build_prompt(self_rules_text.strip(), doc_text)
logs.append("=== Prompt ===")
logs.append(f"[system] len={len(msgs[0]['content']) if msgs else 0}")
logs.append(f"[user] len={len(msgs[1]['content']) if len(msgs)>1 else 0}")
result_dict = call_llm(msgs, model="local", logs=logs)
logs.append("[LLM] done.")
pretty = json.dumps(result_dict, ensure_ascii=False, indent=2)
table = [
[i.get("規則編號",""), i.get("規則內容",""), i.get("判斷",""), i.get("理由",""), i.get("建議","")]
for i in result_dict.get("規則逐點檢核", [])
]
info = f"自評內容長度:{len(doc_text)};自評檢核規則長度:{len(self_rules_text.strip())}"
return info, pretty, table, "\n".join(logs)
except Exception as e:
logs.append(f"[RUN][ERROR] {e}")
return f"執行失敗:{e}", None, None, "\n".join(logs)
def toggle_debug(current_visibility: bool):
"""切換 Debug Log 顯示。"""
try:
return gr.update(visible=not current_visibility), not current_visibility
except Exception as e:
print(f"[DEBUG_TOGGLE][ERROR] {e}")
return gr.update(), current_visibility
# -----------------------------
# Gradio 介面
# -----------------------------
with gr.Blocks(title="法遵自評覆核(PaddleOCR + LLaMA 本地)") as demo:
gr.Markdown("# 法遵自評覆核(支援 PDF OCR:繁/簡中文;本地 LLaMA 推論)")
gr.Markdown("步驟:上傳『單位適用內規(可多檔)』 → 輸入『自評檢核規則』(預設載入 config) → 輸入『單位自評』或上傳『自評附檔(可多檔)』 → 開始檢核。")
# 內規(多檔)+ 清單 + 個別顯示開關 + 預覽(預設隱藏)
with gr.Row():
policy_files_in = gr.Files(
label="上傳單位適用內規(可多檔,.txt 或 .pdf)",
file_types=[".txt", ".pdf"],
type="filepath"
)
policy_list_md = gr.Markdown("(尚未上傳任何檔案)")
policy_show_chk = gr.CheckboxGroup(label="顯示哪些內規內容(個別開關,預設不勾選)", choices=[])
policy_show_area = gr.Textbox(label="內規檔案內容預覽(僅顯示勾選者,可能截斷)", value="", lines=15, visible=False)
policy_cache_state = gr.State({})
# 自評檢核規則(預設由 config 載入,可編輯,視為必填)
with gr.Row():
self_rules_in = gr.Textbox(
label="自評檢核規則(必填;預設載入自 config)",
value=DEFAULT_RULES,
lines=8,
placeholder="請貼上要檢核自評內容的規則;例如:需包含抽檢紀錄、會辦流程、簽核證明等。"
)
# 自評文字(可空)
with gr.Row():
self_text_in = gr.Textbox(
label="單位自評(文字輸入;可留白改上傳自評附檔)",
value="",
lines=10,
placeholder="請輸入自評文字;或留白並在下方上傳自評附檔(可多檔)"
)
# 自評附檔(多檔)+ 清單 + 個別顯示開關 + 預覽(預設隱藏)
with gr.Row():
self_files_in = gr.Files(
label="上傳單位自評附檔(可多檔,.txt 或 .pdf)",
file_types=[".txt", ".pdf"],
type="filepath"
)
self_list_md = gr.Markdown("(尚未上傳任何檔案)")
self_show_chk = gr.CheckboxGroup(label="顯示哪些自評附檔內容(個別開關,預設不勾選)", choices=[])
self_show_area = gr.Textbox(label="自評附檔內容預覽(僅顯示勾選者,可能截斷)", value="", lines=15, visible=False)
self_cache_state = gr.State({})
# 控制列
with gr.Row():
run_btn = gr.Button("開始檢核", variant="primary", interactive=False)
toggle_debug_btn = gr.Button("切換 Debug Log 顯示/隱藏")
info_out = gr.Markdown(label="讀檔/輸入資訊")
json_out = gr.Code(label="檢核結果(JSON)", language="json")
table_out = gr.Dataframe(headers=["規則編號","規則內容","判斷","理由","建議"], wrap=True)
debug_log = gr.Textbox(label="Debug Log", lines=14, visible=False)
debug_state = gr.State(False)
# 內規:清單 + 勾選 + 預覽
policy_files_in.change(
fn=on_files_change,
inputs=[policy_files_in],
outputs=[policy_list_md, policy_show_chk, policy_show_area, policy_cache_state]
)
policy_show_chk.change(
fn=on_show_toggle,
inputs=[policy_show_chk, policy_cache_state],
outputs=[policy_show_area]
)
# 自評附檔:清單 + 勾選 + 預覽
self_files_in.change(
fn=on_files_change,
inputs=[self_files_in],
outputs=[self_list_md, self_show_chk, self_show_area, self_cache_state]
)
self_show_chk.change(
fn=on_show_toggle,
inputs=[self_show_chk, self_cache_state],
outputs=[self_show_area]
)
# 「開始檢核」互動條件:內規(至少一檔) + 自評規則(必填) + 自評(文字或檔案擇一)
def _ready(policy_files, self_rules, self_text, self_files):
try:
ok, _ = validate_before_run(policy_files, self_rules, self_text, self_files)
return gr.update(interactive=ok)
except Exception as e:
print(f"[READY][ERROR] {e}")
return gr.update(interactive=False)
policy_files_in.change(_ready, [policy_files_in, self_rules_in, self_text_in, self_files_in], [run_btn])
self_rules_in.change(_ready, [policy_files_in, self_rules_in, self_text_in, self_files_in], [run_btn])
self_text_in.change(_ready, [policy_files_in, self_rules_in, self_text_in, self_files_in], [run_btn])
self_files_in.change(_ready, [policy_files_in, self_rules_in, self_text_in, self_files_in], [run_btn])
# Run
run_btn.click(
fn=run_check_with_log,
inputs=[policy_files_in, self_rules_in, self_text_in, self_files_in],
outputs=[info_out, json_out, table_out, debug_log]
)
toggle_debug_btn.click(fn=toggle_debug, inputs=[debug_state], outputs=[debug_log, debug_state])
if __name__ == "__main__":
# Spaces 需綁 0.0.0.0;本地可保留相同設定
demo.launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", "7860")), share=False)