Spaces:
Sleeping
Sleeping
| # app.py | |
| # 法遵自評覆核(Hugging Face Spaces 版) | |
| # - PDF 讀取:先用 PyMuPDF 擷取文字;若該頁無文字則以 PaddleOCR(lang="ch")進行 OCR(含繁體) | |
| # - LLM:本地 Hugging Face Transformers(預設 meta-llama/Meta-Llama-3.1-8B-Instruct),首次自動下載到 /data/hf | |
| # - 自評檢核規則:預設從 config/config.json 的 DEFAULT_RULES 載入(可在 UI 中編輯) | |
| # - 每個函式皆加上 try/except 與註解;重要步驟 print log 便於除錯 | |
| # - UI:內規多檔上傳(可預覽)、自評規則(必填)、自評文字或自評附檔(二擇一)、Debug Log 顯示切換 | |
| import os | |
| import io | |
| import re | |
| import json | |
| import time | |
| from typing import List, Dict, Tuple, Optional | |
| import gradio as gr | |
| import fitz # PyMuPDF | |
| import numpy as np | |
| from PIL import Image | |
| # HF 變數與快取位置(首次下載後可持久化) | |
| os.environ.setdefault("TRANSFORMERS_CACHE", "/data/hf") | |
| os.environ.setdefault("HF_HOME", "/data/hf") | |
| # ----------------------------- | |
| # 設定檔載入(含預設值與錯誤處理) | |
| # ----------------------------- | |
| BASE_DIR = os.path.dirname(os.path.abspath(__file__)) | |
| CONFIG_PATH = os.path.join(BASE_DIR, "Config", "config.json") | |
| # 預設 fallback(若找不到 config.json) | |
| _default_cfg = { | |
| "DEFAULT_RULES": "【請在此貼上你的檢核規則】", | |
| "DEMO_PROMPT_INSTRUCTIONS": ( | |
| "你是一位法遵/合規覆核專家。請依『檢核規範』逐點比對『文件全文』,並嚴格以 JSON 回覆:" | |
| '{"符合情況":"符合|不符合|部分符合","原因":[],"改進建議":[],"規則逐點檢核":[{"規則編號":"","規則內容":"",' | |
| '"判斷":"符合|不符合|部分符合","理由":"","建議":""}]}' | |
| ), | |
| "SYSTEM_MESSAGE": "你是嚴謹的法遵覆核專家,請以審計可追溯為原則回覆並僅輸出 JSON。", | |
| "temperature": 0.2, | |
| "MAX_TOKENS_INPUT": 100000 | |
| } | |
| try: | |
| with open(CONFIG_PATH, "r", encoding="utf-8") as f: | |
| cfg = json.load(f) or {} | |
| print(f"[CONFIG] Loaded: {CONFIG_PATH}") | |
| except FileNotFoundError: | |
| print(f"[CONFIG][WARN] {CONFIG_PATH} not found, using defaults.") | |
| cfg = dict(_default_cfg) | |
| except Exception as e: | |
| print(f"[CONFIG][ERROR] Failed to load config.json: {e}. Using defaults.") | |
| cfg = dict(_default_cfg) | |
| DEFAULT_RULES = (cfg.get("DEFAULT_RULES") or _default_cfg["DEFAULT_RULES"]).strip() | |
| DEMO_PROMPT_INSTRUCTIONS = (cfg.get("DEMO_PROMPT_INSTRUCTIONS") or _default_cfg["DEMO_PROMPT_INSTRUCTIONS"]).strip() | |
| SYSTEM_MESSAGE = (cfg.get("SYSTEM_MESSAGE") or _default_cfg["SYSTEM_MESSAGE"]).strip() | |
| TEMPERATURE = float(cfg.get("temperature", _default_cfg["temperature"])) | |
| MAX_TOKENS_INPUT = int(cfg.get("MAX_TOKENS_INPUT", _default_cfg["MAX_TOKENS_INPUT"])) | |
| # ----------------------------- | |
| # PaddleOCR 初始化(lang='ch' 同時支援簡/繁) | |
| # ----------------------------- | |
| import torch | |
| from paddleocr import PaddleOCR | |
| OCR = None | |
| def _init_ocr() -> Optional[PaddleOCR]: | |
| """初始化 PaddleOCR(採用 lang='ch',支援簡/繁;自動偵測 GPU/CPU)。""" | |
| try: | |
| print("[OCR] Initializing PaddleOCR (lang='ch', PP-OCRv4)") | |
| ocr = PaddleOCR( | |
| lang="ch", | |
| use_angle_cls=True, | |
| use_gpu=torch.cuda.is_available(), | |
| ocr_version="PP-OCRv4", | |
| show_log=False | |
| ) | |
| print("[OCR] Ready.") | |
| return ocr | |
| except Exception as e: | |
| print(f"[OCR][ERROR] init failed: {e}") | |
| return None | |
| # ----------------------------- | |
| # 本地 LLaMA(Transformers) | |
| # ----------------------------- | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| LOCAL_MODEL_ID = os.getenv("LOCAL_MODEL_ID", "meta-llama/Meta-Llama-3.1-8B-Instruct") | |
| HF_TOKEN = os.getenv("HF_TOKEN", None) | |
| _hf_tok = None | |
| _hf_model = None | |
| def _ensure_local_model(logs: Optional[List[str]] = None) -> None: | |
| """確保本地模型已載入(首次呼叫時自動下載/載入到 /data/hf)。""" | |
| global _hf_tok, _hf_model | |
| try: | |
| if _hf_tok is not None and _hf_model is not None: | |
| return | |
| if logs is not None: | |
| logs.append(f"[LOCAL LLM] Loading model: {LOCAL_MODEL_ID}") | |
| print(f"[LLM] Loading {LOCAL_MODEL_ID} (cache={os.environ.get('TRANSFORMERS_CACHE')})") | |
| _hf_tok = AutoTokenizer.from_pretrained( | |
| LOCAL_MODEL_ID, use_fast=True, cache_dir=os.environ["TRANSFORMERS_CACHE"], token=HF_TOKEN | |
| ) | |
| _hf_model = AutoModelForCausalLM.from_pretrained( | |
| LOCAL_MODEL_ID, | |
| torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32, | |
| device_map="auto", | |
| cache_dir=os.environ["TRANSFORMERS_CACHE"], | |
| token=HF_TOKEN | |
| ) | |
| if logs is not None: | |
| logs.append("[LOCAL LLM] Model ready.") | |
| print("[LLM] Ready.") | |
| except Exception as e: | |
| if logs is not None: | |
| logs.append(f"[LOCAL LLM][ERROR] load failed: {e}") | |
| print(f"[LLM][ERROR] load failed: {e}") | |
| raise | |
| # ---------- Robust JSON parsing helpers ---------- | |
| def _strip_code_fences(s: str) -> str: | |
| s = s.strip() | |
| if s.startswith("```"): | |
| s = s[3:] | |
| if "```" in s: | |
| s = s.split("```", 1)[0] | |
| s = s.replace("```json", "").replace("```JSON", "").strip("` \n\r\t") | |
| return s | |
| def _extract_first_brace_block(s: str) -> str: | |
| start = s.find("{") | |
| if start == -1: | |
| return s | |
| depth = 0 | |
| for i in range(start, len(s)): | |
| if s[i] == "{": | |
| depth += 1 | |
| elif s[i] == "}": | |
| depth -= 1 | |
| if depth == 0: | |
| return s[start:i+1] | |
| return s | |
| def safe_parse_json(text: str) -> dict: | |
| """ | |
| 先嚴格 json.loads;失敗則: | |
| 1) 去掉 code fences/markdown | |
| 2) 擷取第一個平衡的 {...} | |
| 3) 嘗試 json5(允許單引號、尾逗號) | |
| 4) 修補全形/花式引號與 BOM;必要時把整體單引號轉雙引號 | |
| """ | |
| import json as _json | |
| # 直接試一次 | |
| try: | |
| return _json.loads(text) | |
| except Exception: | |
| pass | |
| s = _strip_code_fences(text) | |
| s = _extract_first_brace_block(s) | |
| try: | |
| return _json.loads(s) | |
| except Exception: | |
| pass | |
| # 可選:json5(若未安裝會直接跳過) | |
| try: | |
| import json5 # type: ignore | |
| return json5.loads(s) | |
| except Exception: | |
| pass | |
| # 修補引號與 BOM | |
| repaired = ( | |
| s.replace("\u201c", '"').replace("\u201d", '"') | |
| .replace("\u2018", "'").replace("\u2019", "'") | |
| .replace("\ufeff", "").strip() | |
| ) | |
| if "'" in repaired and '"' not in repaired: | |
| repaired = repaired.replace("'", '"') | |
| return _json.loads(repaired) | |
| def extract_model_reply(full_text, prompt): | |
| """ | |
| 從模型完整輸出中,移除 prompt 和任何 system、assistant 等前置內容 | |
| """ | |
| try: | |
| # 如果模型有把 prompt 或 system 一起回顯,先找最後一次 user 提問位置 | |
| markers = ["user", "User", "使用者", prompt.strip()] | |
| last_pos = -1 | |
| for m in markers: | |
| pos = full_text.rfind(m) | |
| if pos > last_pos: | |
| last_pos = pos | |
| # 從最後 marker 後面開始取內容 | |
| if last_pos != -1: | |
| reply = full_text[last_pos + len(markers[-1]):] | |
| else: | |
| reply = full_text | |
| # 移除多餘空白與換行 | |
| return reply.strip() | |
| except Exception as e: | |
| print(f"[extract_model_reply 錯誤] {e}") | |
| return full_text.strip() | |
| # === 放在 safe_parse_json 之後:用「正則」擷取 full_text 中最後一個完整 JSON 物件 === | |
| try: | |
| import regex as re2 # 第三方 regex,支援遞迴 (?R) | |
| except Exception: | |
| re2 = None | |
| def extract_last_json_block(text: str) -> Optional[str]: | |
| """ | |
| 以 regex 擷取最後一個平衡的大括號 JSON 物件: | |
| - 優先使用第三方 `regex` 的遞迴 (?R) 來比對平衡大括號 | |
| - 若無法使用 `regex`,改用手動堆疊法做 fallback | |
| 回傳:最後一個 JSON 物件字串;若找不到回傳 None | |
| """ | |
| try: | |
| s = _strip_code_fences(text) | |
| # 1) 使用 regex (?R) 遞迴:{\n ... { ... } ... \n} | |
| if re2 is not None: | |
| pattern = re2.compile(r"\{(?:[^{}]|(?R))*\}", flags=re2.DOTALL) | |
| matches = [m.group(0) for m in pattern.finditer(s)] | |
| return matches[-1] if matches else None | |
| # 2) 無 regex 模組 → 手動掃描平衡大括號 | |
| blocks = [] | |
| depth = 0 | |
| start = None | |
| for i, ch in enumerate(s): | |
| if ch == "{": | |
| if depth == 0: | |
| start = i | |
| depth += 1 | |
| elif ch == "}": | |
| if depth > 0: | |
| depth -= 1 | |
| if depth == 0 and start is not None: | |
| blocks.append(s[start:i+1]) | |
| start = None | |
| return blocks[-1] if blocks else None | |
| except Exception as e: | |
| print(f"[JSON-EXTRACT][ERROR] {e}") | |
| return None | |
| def call_llm(messages: List[dict], model: str, logs: List[str]) -> dict: | |
| """ | |
| 保留原名稱 call_llm,但改為本地 LLaMA。 | |
| 嚴格要求僅輸出 JSON;若混入其他文本,會正則擷取第一個 {...}。 | |
| """ | |
| start_time = time.time() # 計時開始 | |
| try: | |
| _ensure_local_model(logs) | |
| # 準備 chat prompt(加上 JSON 輸出約束) | |
| sys_txt = messages[0].get("content", "") if messages else "" | |
| usr_txt = messages[1].get("content", "") if len(messages) > 1 else "" | |
| extra_rules = "\n\n請務必只輸出單一 JSON 物件,不得包含任何 JSON 之外的文字或符號。" | |
| print('準備 chat prompt(加上 JSON 輸出約束)') | |
| chat = [ | |
| {"role": "system", "content": sys_txt}, | |
| {"role": "user", "content": usr_txt + extra_rules} | |
| ] | |
| print(f"user content:{usr_txt + extra_rules}") | |
| prompt = _hf_tok.apply_chat_template(chat, tokenize=False, add_generation_prompt=True) | |
| inputs = _hf_tok(prompt, return_tensors="pt").to(_hf_model.device) | |
| print("inputs") | |
| with torch.no_grad(): | |
| out_ids = _hf_model.generate( | |
| **inputs, | |
| max_new_tokens=1024, | |
| temperature=float(TEMPERATURE), | |
| do_sample=(float(TEMPERATURE) > 0), | |
| eos_token_id=_hf_tok.eos_token_id, | |
| pad_token_id=_hf_tok.eos_token_id | |
| ) | |
| print("torch.no_grad") | |
| # 解碼生成內容後 | |
| full_text = _hf_tok.decode(out_ids[0], skip_special_tokens=True) | |
| # ★ 使用 regex/堆疊法:從 full_text 擷取「最後一個」完整 JSON 物件 | |
| candidate = extract_last_json_block(full_text) | |
| gen_text = candidate if candidate is not None else full_text # 若找不到就用原文(後續 safe_parse_json 仍會嘗試) | |
| logs.append(f"[LOCAL LLM] raw_len={len(gen_text)}") | |
| logs.append(f"[LOCAL LLM] gen_text={gen_text}") | |
| logs.append(f"[LOCAL LLM] prompt={prompt}") | |
| logs.append(f"[LOCAL LLM] full_text={full_text}") | |
| # 強韌解析 | |
| try: | |
| data = safe_parse_json(gen_text) | |
| logs.append("[LOCAL LLM] JSON 解析成功") | |
| return data | |
| except Exception as jerr: | |
| logs.append(f"[LOCAL LLM] JSON 解析失敗:{jerr}") | |
| return { | |
| "符合情況": "部分符合", | |
| "原因": [f"模型輸出非合法 JSON:{str(jerr)}"], | |
| "改進建議": ["請調整提示詞,要求嚴格輸出 JSON(雙引號、無註解、無多餘文字)。"], | |
| "規則逐點檢核": [] | |
| } | |
| return data | |
| except Exception as e: | |
| logs.append(f"[LOCAL LLM] JSON 解析失敗:{e}") | |
| return { | |
| "符合情況": "部分符合", | |
| "原因": [f"本地模型錯誤:{e}"], | |
| "改進建議": ["請檢查 Hugging Face 權杖與模型權限、或改用較小模型。"], | |
| "規則逐點檢核": [] | |
| } | |
| finally: | |
| elapsed = time.time() - start_time | |
| logs.append(f"[LOCAL LLM] 本次執行耗時:{elapsed:.2f} 秒") | |
| # ----------------------------- | |
| # 檔案讀取:TXT / PDF(含 OCR fallback) | |
| # ----------------------------- | |
| def _read_text_file(path: str) -> str: | |
| """讀取 TXT:嘗試多種常見編碼(台灣環境可能遇到 cp950/big5)。""" | |
| try: | |
| print(f"[READ] TXT: {os.path.basename(path)}") | |
| encodings = ("utf-8", "utf-8-sig", "cp950", "big5", "latin1") | |
| for enc in encodings: | |
| try: | |
| with open(path, "r", encoding=enc, errors="ignore") as f: | |
| return f.read() | |
| except Exception: | |
| continue | |
| with open(path, "rb") as f: | |
| return f.read().decode("utf-8", errors="ignore") | |
| except Exception as e: | |
| print(f"[READ][ERROR] TXT {path}: {e}") | |
| return "" | |
| def _ensure_ocr_ready(): | |
| """Lazy 初始化 OCR(避免在未用到時佔資源)。""" | |
| global OCR | |
| if OCR is None: | |
| OCR = _init_ocr() | |
| def _ocr_page_text(page) -> str: | |
| """將 PDF 頁面轉圖後以 OCR 讀取文字。""" | |
| try: | |
| _ensure_ocr_ready() | |
| if OCR is None: | |
| return "" | |
| pix = page.get_pixmap(dpi=240) | |
| img = Image.open(io.BytesIO(pix.tobytes("png"))).convert("RGB") | |
| res = OCR.ocr(np.array(img), cls=True) | |
| if not res or not res[0]: | |
| return "" | |
| return "\n".join([line[1][0] for line in res[0]]).strip() | |
| except Exception as e: | |
| print(f"[OCR][ERROR] {e}") | |
| return "" | |
| def _read_pdf_text(path: str) -> Tuple[str, int]: | |
| """ | |
| 讀取 PDF:若該頁抽不到文字(可能是掃描影像),則以 OCR 進行辨識。 | |
| 回傳:(全文, 頁數) | |
| """ | |
| try: | |
| print(f"[READ] PDF: {os.path.basename(path)}") | |
| parts: List[str] = [] | |
| with fitz.open(path) as doc: | |
| for page in doc: | |
| txt = (page.get_text("text") or "").strip() | |
| if len(txt) < 20: # 低於門檻判定影像頁 | |
| ocr_txt = _ocr_page_text(page) | |
| parts.append(ocr_txt) | |
| else: | |
| parts.append(txt) | |
| return "\n".join(parts).strip(), len(doc) | |
| except Exception as e: | |
| print(f"[READ][ERROR] PDF {path}: {e}") | |
| return "", 0 | |
| def _read_file_to_text(file_path: Optional[str]) -> Tuple[str, str]: | |
| """統一入口:根據副檔名分派至 TXT/PDF 讀取函式。""" | |
| try: | |
| if not file_path: | |
| raise ValueError("未收到檔案路徑") | |
| file_path = os.fspath(file_path) | |
| if not os.path.exists(file_path): | |
| raise FileNotFoundError(f"檔案不存在:{file_path}") | |
| fname = os.path.basename(file_path) | |
| ext = os.path.splitext(fname)[1].lower() | |
| if ext == ".txt": | |
| content = _read_text_file(file_path) | |
| return content, f"讀取 TXT:{fname}(長度:{len(content)})" | |
| elif ext == ".pdf": | |
| content, pages = _read_pdf_text(file_path) | |
| return content, f"讀取 PDF:{fname}(頁數:{pages};長度:{len(content)})" | |
| else: | |
| raise ValueError("僅支援 .txt 或 .pdf") | |
| except Exception as e: | |
| print(f"[READ][ERROR] {e}") | |
| return "", f"[ERROR] {e}" | |
| # ----------------------------- | |
| # 多檔輔助:清單摘要 / 快取與預覽 | |
| # ----------------------------- | |
| def _read_multi(paths: Optional[List[str]]) -> Dict[str, str]: | |
| """批次讀取多檔,回傳 {檔名: 文字內容},忽略讀取錯誤。""" | |
| cache: Dict[str, str] = {} | |
| try: | |
| if not paths: | |
| return cache | |
| for p in paths: | |
| if not p: | |
| continue | |
| try: | |
| name = os.path.basename(os.fspath(p)) | |
| text, _ = _read_file_to_text(p) | |
| cache[name] = text | |
| except Exception as e: | |
| print(f"[READ][WARN] skip {p}: {e}") | |
| continue | |
| return cache | |
| except Exception as e: | |
| print(f"[READ_MULTI][ERROR] {e}") | |
| return cache | |
| def _summarize_paths(paths: Optional[List[str]]) -> str: | |
| """列出上傳檔案清單(含大小),供 UI 顯示。""" | |
| try: | |
| if not paths: | |
| return "(尚未上傳任何檔案)" | |
| lines = [] | |
| for p in paths: | |
| try: | |
| name = os.path.basename(os.fspath(p)) | |
| size = os.path.getsize(os.fspath(p)) | |
| lines.append(f"- {name}({size} bytes)") | |
| except Exception: | |
| lines.append(f"- {os.path.basename(str(p))}") | |
| return "\n".join(lines) | |
| except Exception as e: | |
| print(f"[SUMMARY][ERROR] {e}") | |
| return "(清單生成失敗)" | |
| def on_files_change(paths: Optional[List[str]]): | |
| """Gradio callback:檔案更新 → 更新清單 / 勾選選項 / 預覽區 / 快取。""" | |
| try: | |
| cache = _read_multi(paths) | |
| choices = list(cache.keys()) | |
| return ( | |
| _summarize_paths(paths), | |
| gr.update(choices=choices, value=[]), | |
| gr.update(value="", visible=False), | |
| cache, | |
| ) | |
| except Exception as e: | |
| print(f"[FILES_CHANGE][ERROR] {e}") | |
| return "(更新失敗)", gr.update(), gr.update(), {} | |
| def on_show_toggle(selected_names: List[str], cache: Dict[str, str]): | |
| """Gradio callback:勾選要預覽的檔名 → 顯示合併內容(截斷顯示)。""" | |
| try: | |
| if not selected_names: | |
| return gr.update(value="", visible=False) | |
| parts = [] | |
| preview_limit = max(2000, MAX_TOKENS_INPUT // 5) | |
| for name in selected_names: | |
| txt = cache.get(name, "") | |
| if len(txt) > preview_limit: | |
| txt = txt[:preview_limit] + "\n...[內容過長,已截斷顯示]" | |
| parts.append(f"===== {name} =====\n{txt}") | |
| return gr.update(value="\n\n".join(parts), visible=True) | |
| except Exception as e: | |
| print(f"[SHOW_TOGGLE][ERROR] {e}") | |
| return gr.update(value=f"[ERROR] {e}", visible=True) | |
| # ----------------------------- | |
| # 檢核前驗證 / Prompt 構建 | |
| # ----------------------------- | |
| def validate_before_run( | |
| policy_files: List[str], | |
| self_rules: str, | |
| self_text: str, | |
| self_files: List[str], | |
| ) -> Tuple[bool, str]: | |
| """檢核前的前置驗證:內規至少一檔、規則必填、自評文字或附檔至少其一。""" | |
| try: | |
| errors = [] | |
| if not (policy_files and len(policy_files) > 0): | |
| errors.append("請上傳『單位適用內規(可多檔)』至少一個檔案。") | |
| if not (self_rules and self_rules.strip()): | |
| errors.append("請輸入『自評檢核規則』。") | |
| if not (self_text and self_text.strip()) and not (self_files and len(self_files) > 0): | |
| errors.append("請提供『單位自評』文字,或上傳『單位自評附檔(可多檔)』至少一個。") | |
| ok = (len(errors) == 0) | |
| return ok, "\n".join(errors) | |
| except Exception as e: | |
| print(f"[VALIDATE][ERROR] {e}") | |
| return False, f"驗證失敗:{e}" | |
| def build_prompt(rules: str, doc_text: str) -> List[dict]: | |
| """依規則與文件全文構建 chat messages。""" | |
| try: | |
| system_msg = {"role": "system", "content": SYSTEM_MESSAGE} | |
| user_msg = { | |
| "role": "user", | |
| "content": f"{DEMO_PROMPT_INSTRUCTIONS}\n\n[檢核規範]\n{rules}\n\n[文件全文]\n{doc_text}" | |
| } | |
| return [system_msg, user_msg] | |
| except Exception as e: | |
| print(f"[PROMPT][ERROR] {e}") | |
| return [{"role": "system", "content": SYSTEM_MESSAGE}, | |
| {"role": "user", "content": "(構建 Prompt 失敗)"}] | |
| # ----------------------------- | |
| # 主流程:執行檢核 | |
| # ----------------------------- | |
| def run_check_with_log( | |
| policy_files_paths, # 多檔內規(必填) | |
| self_rules_text, # 自評檢核規則(必填;預設載入 DEFAULT_RULES) | |
| self_text, # 自評文字(可空) | |
| self_files_paths # 多檔自評附檔(可空) | |
| ): | |
| logs: List[str] = [] | |
| try: | |
| # 驗證 | |
| ok, msg = validate_before_run(policy_files_paths, self_rules_text, self_text, self_files_paths) | |
| if not ok: | |
| msg_display = msg.replace("\n", ";") | |
| logs.append(f"[VALIDATE] {msg_display}") | |
| return msg, None, None, "\n".join(logs) | |
| # 紀錄 / 讀取內規(目前僅作為流程留痕) | |
| for p in (policy_files_paths or []): | |
| t, info = _read_file_to_text(p) | |
| logs.append(f"[POLICY] {info}") | |
| # 取得自評內容(優先文字;否則合併自評附檔) | |
| doc_text = "" | |
| if self_text and self_text.strip(): | |
| doc_text = self_text.strip() | |
| logs.append(f"[SELF] from textbox, len={len(doc_text)}") | |
| else: | |
| pieces = [] | |
| for p in (self_files_paths or []): | |
| t, info = _read_file_to_text(p) | |
| logs.append(f"[SELF_FILE] {info}") | |
| if t.strip(): | |
| pieces.append(t.strip()) | |
| doc_text = "\n\n".join(pieces).strip() | |
| if not doc_text: | |
| return "未取得任何自評內容(文字與附檔皆為空)。", None, None, "\n".join(logs) | |
| # 長度限制(保守) | |
| if len(doc_text) > MAX_TOKENS_INPUT: | |
| logs.append(f"[WARN] self text too long ({len(doc_text)}) → truncate to {MAX_TOKENS_INPUT}") | |
| doc_text = doc_text[:MAX_TOKENS_INPUT] | |
| # 構建訊息並呼叫本地 LLM | |
| msgs = build_prompt(self_rules_text.strip(), doc_text) | |
| logs.append("=== Prompt ===") | |
| logs.append(f"[system] len={len(msgs[0]['content']) if msgs else 0}") | |
| logs.append(f"[user] len={len(msgs[1]['content']) if len(msgs)>1 else 0}") | |
| result_dict = call_llm(msgs, model="local", logs=logs) | |
| logs.append("[LLM] done.") | |
| pretty = json.dumps(result_dict, ensure_ascii=False, indent=2) | |
| table = [ | |
| [i.get("規則編號",""), i.get("規則內容",""), i.get("判斷",""), i.get("理由",""), i.get("建議","")] | |
| for i in result_dict.get("規則逐點檢核", []) | |
| ] | |
| info = f"自評內容長度:{len(doc_text)};自評檢核規則長度:{len(self_rules_text.strip())}" | |
| return info, pretty, table, "\n".join(logs) | |
| except Exception as e: | |
| logs.append(f"[RUN][ERROR] {e}") | |
| return f"執行失敗:{e}", None, None, "\n".join(logs) | |
| def toggle_debug(current_visibility: bool): | |
| """切換 Debug Log 顯示。""" | |
| try: | |
| return gr.update(visible=not current_visibility), not current_visibility | |
| except Exception as e: | |
| print(f"[DEBUG_TOGGLE][ERROR] {e}") | |
| return gr.update(), current_visibility | |
| # ----------------------------- | |
| # Gradio 介面 | |
| # ----------------------------- | |
| with gr.Blocks(title="法遵自評覆核(PaddleOCR + LLaMA 本地)") as demo: | |
| gr.Markdown("# 法遵自評覆核(支援 PDF OCR:繁/簡中文;本地 LLaMA 推論)") | |
| gr.Markdown("步驟:上傳『單位適用內規(可多檔)』 → 輸入『自評檢核規則』(預設載入 config) → 輸入『單位自評』或上傳『自評附檔(可多檔)』 → 開始檢核。") | |
| # 內規(多檔)+ 清單 + 個別顯示開關 + 預覽(預設隱藏) | |
| with gr.Row(): | |
| policy_files_in = gr.Files( | |
| label="上傳單位適用內規(可多檔,.txt 或 .pdf)", | |
| file_types=[".txt", ".pdf"], | |
| type="filepath" | |
| ) | |
| policy_list_md = gr.Markdown("(尚未上傳任何檔案)") | |
| policy_show_chk = gr.CheckboxGroup(label="顯示哪些內規內容(個別開關,預設不勾選)", choices=[]) | |
| policy_show_area = gr.Textbox(label="內規檔案內容預覽(僅顯示勾選者,可能截斷)", value="", lines=15, visible=False) | |
| policy_cache_state = gr.State({}) | |
| # 自評檢核規則(預設由 config 載入,可編輯,視為必填) | |
| with gr.Row(): | |
| self_rules_in = gr.Textbox( | |
| label="自評檢核規則(必填;預設載入自 config)", | |
| value=DEFAULT_RULES, | |
| lines=8, | |
| placeholder="請貼上要檢核自評內容的規則;例如:需包含抽檢紀錄、會辦流程、簽核證明等。" | |
| ) | |
| # 自評文字(可空) | |
| with gr.Row(): | |
| self_text_in = gr.Textbox( | |
| label="單位自評(文字輸入;可留白改上傳自評附檔)", | |
| value="", | |
| lines=10, | |
| placeholder="請輸入自評文字;或留白並在下方上傳自評附檔(可多檔)" | |
| ) | |
| # 自評附檔(多檔)+ 清單 + 個別顯示開關 + 預覽(預設隱藏) | |
| with gr.Row(): | |
| self_files_in = gr.Files( | |
| label="上傳單位自評附檔(可多檔,.txt 或 .pdf)", | |
| file_types=[".txt", ".pdf"], | |
| type="filepath" | |
| ) | |
| self_list_md = gr.Markdown("(尚未上傳任何檔案)") | |
| self_show_chk = gr.CheckboxGroup(label="顯示哪些自評附檔內容(個別開關,預設不勾選)", choices=[]) | |
| self_show_area = gr.Textbox(label="自評附檔內容預覽(僅顯示勾選者,可能截斷)", value="", lines=15, visible=False) | |
| self_cache_state = gr.State({}) | |
| # 控制列 | |
| with gr.Row(): | |
| run_btn = gr.Button("開始檢核", variant="primary", interactive=False) | |
| toggle_debug_btn = gr.Button("切換 Debug Log 顯示/隱藏") | |
| info_out = gr.Markdown(label="讀檔/輸入資訊") | |
| json_out = gr.Code(label="檢核結果(JSON)", language="json") | |
| table_out = gr.Dataframe(headers=["規則編號","規則內容","判斷","理由","建議"], wrap=True) | |
| debug_log = gr.Textbox(label="Debug Log", lines=14, visible=False) | |
| debug_state = gr.State(False) | |
| # 內規:清單 + 勾選 + 預覽 | |
| policy_files_in.change( | |
| fn=on_files_change, | |
| inputs=[policy_files_in], | |
| outputs=[policy_list_md, policy_show_chk, policy_show_area, policy_cache_state] | |
| ) | |
| policy_show_chk.change( | |
| fn=on_show_toggle, | |
| inputs=[policy_show_chk, policy_cache_state], | |
| outputs=[policy_show_area] | |
| ) | |
| # 自評附檔:清單 + 勾選 + 預覽 | |
| self_files_in.change( | |
| fn=on_files_change, | |
| inputs=[self_files_in], | |
| outputs=[self_list_md, self_show_chk, self_show_area, self_cache_state] | |
| ) | |
| self_show_chk.change( | |
| fn=on_show_toggle, | |
| inputs=[self_show_chk, self_cache_state], | |
| outputs=[self_show_area] | |
| ) | |
| # 「開始檢核」互動條件:內規(至少一檔) + 自評規則(必填) + 自評(文字或檔案擇一) | |
| def _ready(policy_files, self_rules, self_text, self_files): | |
| try: | |
| ok, _ = validate_before_run(policy_files, self_rules, self_text, self_files) | |
| return gr.update(interactive=ok) | |
| except Exception as e: | |
| print(f"[READY][ERROR] {e}") | |
| return gr.update(interactive=False) | |
| policy_files_in.change(_ready, [policy_files_in, self_rules_in, self_text_in, self_files_in], [run_btn]) | |
| self_rules_in.change(_ready, [policy_files_in, self_rules_in, self_text_in, self_files_in], [run_btn]) | |
| self_text_in.change(_ready, [policy_files_in, self_rules_in, self_text_in, self_files_in], [run_btn]) | |
| self_files_in.change(_ready, [policy_files_in, self_rules_in, self_text_in, self_files_in], [run_btn]) | |
| # Run | |
| run_btn.click( | |
| fn=run_check_with_log, | |
| inputs=[policy_files_in, self_rules_in, self_text_in, self_files_in], | |
| outputs=[info_out, json_out, table_out, debug_log] | |
| ) | |
| toggle_debug_btn.click(fn=toggle_debug, inputs=[debug_state], outputs=[debug_log, debug_state]) | |
| if __name__ == "__main__": | |
| # Spaces 需綁 0.0.0.0;本地可保留相同設定 | |
| demo.launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", "7860")), share=False) | |