import gradio as gr import json import hashlib import os import requests from bs4 import BeautifulSoup from datetime import datetime from huggingface_hub import HfApi from pypdf import PdfReader import docx import traceback from gtts import gTTS DATA_FILE = "legislation_rules.json" RULINGS_FILE = "rulings_log.json" # --- CORE LOGIC --- def load_data(file_path): if os.path.exists(file_path): with open(file_path, 'r') as f: try: return json.load(f) except: return [] return [] def save_data(data, file_path): with open(file_path, 'w') as f: json.dump(data, f, indent=2) def get_canonical_hash(text): return hashlib.sha256(text.strip().encode('utf-8')).hexdigest() def extract_text_from_any(file_obj): if file_obj is None: return "" file_path = file_obj.name if hasattr(file_obj, 'name') else file_obj if not os.path.exists(file_path): return "" ext = os.path.splitext(file_path)[1].lower() text = "" try: if ext == ".pdf": reader = PdfReader(file_path) text = "\n".join([p.extract_text() for p in reader.pages if p.extract_text()]) elif ext == ".docx": doc = docx.Document(file_path) text = "\n".join([p.text for p in doc.paragraphs]) elif ext == ".txt": with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: text = f.read() except Exception as e: print(f"Extraction error: {e}") return text def add_rule_manually(act, title, text, source): data = load_data(DATA_FILE) det_id = get_canonical_hash(text) if any(r['deterministic_id'] == det_id for r in data): return f"Warning: Rule '{title}' already exists." new_rule = { "act": act, "section_title": title, "text": text, "source_url": source, "deterministic_id": det_id, "added_at": datetime.now().isoformat() } data.append(new_rule) save_data(data, DATA_FILE) return f"Successfully added: {title}" def process_rule_document(file_obj, act_name): text = extract_text_from_any(file_obj) if not text: return "Error: Could not extract text from rule document." return add_rule_manually(act_name, os.path.basename(file_obj.name), text, f"File: {os.path.basename(file_obj.name)}") def generate_tts(text): if not text or len(text) < 5: return None try: clean_text = text.replace("#", "").replace("*", "").replace("`", "") tts = gTTS(text=clean_text[:1500], lang='en') filename = f"ruling_audio_{int(datetime.now().timestamp())}.mp3" tts.save(filename) return filename except Exception as e: print(f"TTS Error: {e}") return None def rule_on_issue(issue_description, issue_file, llm_endpoint, llm_key, llm_model): file_text = extract_text_from_any(issue_file) combined_issue = f"{issue_description}\n\n[EXTRACTED FROM DOCUMENT]:\n{file_text}".strip() if not combined_issue: return "Error: No issue description or document provided.", "", None rules = load_data(DATA_FILE) if not rules: return "Error: Dataset is empty. Add rules (CRA, FCA, etc.) first.", "", None context = "\n".join([f"[{r['act']} - {r['section_title']}]: {r['text'][:600]}..." for r in rules[:20]]) prompt = f"You are a Regulatory Ruling Engine.\nISSUE TO ANALYZE:\n{combined_issue}\nDETERMINISTIC REFERENCE RULES:\n{context}\nTASK:\n1. Identify applicable rules.\n2. Provide a 'Formal Ruling' (Compliant/Non-Compliant/Warning).\n3. Cite Rule Titles and Deterministic IDs.\n4. Provide forensic justification." ruling_text = "LLM Inference required for automated ruling." if llm_endpoint and llm_key: try: headers = {"Authorization": f"Bearer {llm_key}", "Content-Type": "application/json"} payload = {"model": llm_model, "messages": [{"role": "user", "content": prompt}], "temperature": 0} response = requests.post(f"{llm_endpoint.rstrip('/')}/v1/chat/completions", json=payload, headers=headers, timeout=45) ruling_text = response.json()['choices'][0]['message']['content'] except Exception as e: ruling_text = f"Inference Error: {str(e)}" audio_file = generate_tts(ruling_text) rulings_log = load_data(RULINGS_FILE) new_ruling = {"timestamp": datetime.now().isoformat(), "issue_summary": combined_issue[:200], "ruling": ruling_text, "ruling_hash": hashlib.sha256(ruling_text.encode()).hexdigest()} rulings_log.append(new_ruling) save_data(rulings_log, RULINGS_FILE) return ruling_text, f"Ruling Logged (ID: {new_ruling['ruling_hash'][:8]})", audio_file def sync_all(token, dataset_id): if not token or not dataset_id: return "Error: Missing credentials." api = HfApi() try: api.upload_file(path_or_fileobj=DATA_FILE, path_in_repo=DATA_FILE, repo_id=dataset_id, repo_type="dataset", token=token) if os.path.exists(RULINGS_FILE): api.upload_file(path_or_fileobj=RULINGS_FILE, path_in_repo=RULINGS_FILE, repo_id=dataset_id, repo_type="dataset", token=token) return "Full Sync Successful." except Exception as e: return f"Sync Failed: {str(e)}" def view_stats(): data = load_data(DATA_FILE) if not data: return "Empty" acts = {} for r in data: acts[r['act']] = acts.get(r['act'], 0) + 1 return "\n".join([f"{k}: {v}" for k,v in acts.items()]) # --- GRADIO UI --- USER_MANUAL_MD = """ # ⚖️ Legislation & FCA Ruling Manager: User Manual ## 🛠️ Building Your Rulebase 1. **Manual Entry**: Go to **Manage Rules** to paste specific sections from acts. 2. **Document Ingestion**: Upload **PDF, DOCX, or TXT** files of internal policies or acts. 3. **FCA Automation**: Use the **FCA Guidelines** tab to pull the **12 Principles for Businesses (PRIN)**. ## 🏛️ Ruling on Issues 1. **Describing the Issue**: Go to **Rule on Issues**. Upload an **Issue Document** or type a description. 2. **Generating the Ruling**: Ensure your **Inference Settings** are configured, then click **Generate Formal Ruling**. 3. **Audible Delivery (TTS)**: Click play on the **Audio Player** to hear the ruling read aloud. ## ☁️ Sync & Audit - Go to **Sync & Audit**, enter your **HF Write Token** and **Dataset ID**, and click **Sync Everything**. """ with gr.Blocks(theme=gr.themes.Soft()) as demo: gr.Markdown("# ⚖️ Legislation & FCA Ruling Manager") with gr.Tab("🏛️ Rule on Issues"): gr.Markdown("### 📂 Issue Document Intake") with gr.Row(): with gr.Column(): issue_file = gr.File(label="Upload Issue Document (PDF/DOCX/TXT)", file_count="single") issue_desc = gr.TextArea(label="Additional Context / Issue Description") with gr.Accordion("⚙️ Inference Settings", open=False): end = gr.Textbox(label="Endpoint", value="http://localhost:11434/v1") key = gr.Textbox(label="Key", type="password") mod = gr.Textbox(label="Model", value="llama3") rule_btn = gr.Button("Generate Formal Ruling", variant="primary") with gr.Column(): ruling_out = gr.Markdown(label="Official Ruling") audio_out = gr.Audio(label="Audible Ruling", interactive=False) log_status = gr.Textbox(label="Audit Status") with gr.Tab("➕ Manage Rules"): with gr.Row(): with gr.Column(): gr.Markdown("#### 📂 Add Legislation/Handbook via File") doc_act = gr.Textbox(label="Act/Source Name") doc_in = gr.File(label="Upload Rule Document") doc_btn = gr.Button("Process Rule") gr.Markdown("---") gr.Markdown("#### ✍️ Manual Rule Entry") m_act = gr.Textbox(label="Act"); m_tit = gr.Textbox(label="Title"); m_src = gr.Textbox(label="Source"); m_txt = gr.TextArea(label="Text") m_btn = gr.Button("Add Rule") with gr.Column(): op_status = gr.Textbox(label="Status") stats_out = gr.Textbox(label="Dataset Inventory", value=view_stats()) refresh_btn = gr.Button("Refresh Inventory") with gr.Tab("🏦 FCA Guidelines"): gr.Markdown("### 🛠️ FCA Handbook Automation") fca_btn = gr.Button("Ingest FCA PRIN Principles", variant="secondary") fca_status = gr.Textbox(label="FCA Ingestion Status") with gr.Tab("☁️ Sync & Audit"): hf_t = gr.Textbox(label="HF Token", type="password"); hf_d = gr.Textbox(label="Dataset ID") s_btn = gr.Button("Sync Everything to Dataset") s_out = gr.Textbox(label="Sync Status") with gr.Tab("📖 User Manual"): gr.Markdown(USER_MANUAL_MD) rule_btn.click(fn=rule_on_issue, inputs=[issue_desc, issue_file, end, key, mod], outputs=[ruling_out, log_status, audio_out]) doc_btn.click(fn=process_rule_document, inputs=[doc_in, doc_act], outputs=op_status) m_btn.click(fn=add_rule_manually, inputs=[m_act, m_tit, m_txt, m_src], outputs=op_status) refresh_btn.click(fn=view_stats, outputs=stats_out) s_btn.click(fn=sync_all, inputs=[hf_t, hf_d], outputs=s_out) if __name__ == "__main__": demo.launch()