Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import json | |
| import hashlib | |
| import os | |
| import requests | |
| from bs4 import BeautifulSoup | |
| from datetime import datetime | |
| from huggingface_hub import HfApi | |
| from pypdf import PdfReader | |
| import docx | |
| import traceback | |
| from gtts import gTTS | |
| DATA_FILE = "legislation_rules.json" | |
| RULINGS_FILE = "rulings_log.json" | |
| # --- CORE LOGIC --- | |
| def load_data(file_path): | |
| if os.path.exists(file_path): | |
| with open(file_path, 'r') as f: | |
| try: | |
| return json.load(f) | |
| except: | |
| return [] | |
| return [] | |
| def save_data(data, file_path): | |
| with open(file_path, 'w') as f: | |
| json.dump(data, f, indent=2) | |
| def get_canonical_hash(text): | |
| return hashlib.sha256(text.strip().encode('utf-8')).hexdigest() | |
| def extract_text_from_any(file_obj): | |
| if file_obj is None: return "" | |
| file_path = file_obj.name if hasattr(file_obj, 'name') else file_obj | |
| if not os.path.exists(file_path): return "" | |
| ext = os.path.splitext(file_path)[1].lower() | |
| text = "" | |
| try: | |
| if ext == ".pdf": | |
| reader = PdfReader(file_path) | |
| text = "\n".join([p.extract_text() for p in reader.pages if p.extract_text()]) | |
| elif ext == ".docx": | |
| doc = docx.Document(file_path) | |
| text = "\n".join([p.text for p in doc.paragraphs]) | |
| elif ext == ".txt": | |
| with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: | |
| text = f.read() | |
| except Exception as e: | |
| print(f"Extraction error: {e}") | |
| return text | |
| def add_rule_manually(act, title, text, source): | |
| data = load_data(DATA_FILE) | |
| det_id = get_canonical_hash(text) | |
| if any(r['deterministic_id'] == det_id for r in data): | |
| return f"Warning: Rule '{title}' already exists." | |
| new_rule = { | |
| "act": act, | |
| "section_title": title, | |
| "text": text, | |
| "source_url": source, | |
| "deterministic_id": det_id, | |
| "added_at": datetime.now().isoformat() | |
| } | |
| data.append(new_rule) | |
| save_data(data, DATA_FILE) | |
| return f"Successfully added: {title}" | |
| def process_rule_document(file_obj, act_name): | |
| text = extract_text_from_any(file_obj) | |
| if not text: return "Error: Could not extract text from rule document." | |
| return add_rule_manually(act_name, os.path.basename(file_obj.name), text, f"File: {os.path.basename(file_obj.name)}") | |
| def generate_tts(text): | |
| if not text or len(text) < 5: return None | |
| try: | |
| clean_text = text.replace("#", "").replace("*", "").replace("`", "") | |
| tts = gTTS(text=clean_text[:1500], lang='en') | |
| filename = f"ruling_audio_{int(datetime.now().timestamp())}.mp3" | |
| tts.save(filename) | |
| return filename | |
| except Exception as e: | |
| print(f"TTS Error: {e}") | |
| return None | |
| def rule_on_issue(issue_description, issue_file, llm_endpoint, llm_key, llm_model): | |
| file_text = extract_text_from_any(issue_file) | |
| combined_issue = f"{issue_description}\n\n[EXTRACTED FROM DOCUMENT]:\n{file_text}".strip() | |
| if not combined_issue: return "Error: No issue description or document provided.", "", None | |
| rules = load_data(DATA_FILE) | |
| if not rules: return "Error: Dataset is empty. Add rules (CRA, FCA, etc.) first.", "", None | |
| context = "\n".join([f"[{r['act']} - {r['section_title']}]: {r['text'][:600]}..." for r in rules[:20]]) | |
| prompt = f"You are a Regulatory Ruling Engine.\nISSUE TO ANALYZE:\n{combined_issue}\nDETERMINISTIC REFERENCE RULES:\n{context}\nTASK:\n1. Identify applicable rules.\n2. Provide a 'Formal Ruling' (Compliant/Non-Compliant/Warning).\n3. Cite Rule Titles and Deterministic IDs.\n4. Provide forensic justification." | |
| ruling_text = "LLM Inference required for automated ruling." | |
| if llm_endpoint and llm_key: | |
| try: | |
| headers = {"Authorization": f"Bearer {llm_key}", "Content-Type": "application/json"} | |
| payload = {"model": llm_model, "messages": [{"role": "user", "content": prompt}], "temperature": 0} | |
| response = requests.post(f"{llm_endpoint.rstrip('/')}/v1/chat/completions", json=payload, headers=headers, timeout=45) | |
| ruling_text = response.json()['choices'][0]['message']['content'] | |
| except Exception as e: ruling_text = f"Inference Error: {str(e)}" | |
| audio_file = generate_tts(ruling_text) | |
| rulings_log = load_data(RULINGS_FILE) | |
| new_ruling = {"timestamp": datetime.now().isoformat(), "issue_summary": combined_issue[:200], "ruling": ruling_text, "ruling_hash": hashlib.sha256(ruling_text.encode()).hexdigest()} | |
| rulings_log.append(new_ruling) | |
| save_data(rulings_log, RULINGS_FILE) | |
| return ruling_text, f"Ruling Logged (ID: {new_ruling['ruling_hash'][:8]})", audio_file | |
| def sync_all(token, dataset_id): | |
| if not token or not dataset_id: return "Error: Missing credentials." | |
| api = HfApi() | |
| try: | |
| api.upload_file(path_or_fileobj=DATA_FILE, path_in_repo=DATA_FILE, repo_id=dataset_id, repo_type="dataset", token=token) | |
| if os.path.exists(RULINGS_FILE): | |
| api.upload_file(path_or_fileobj=RULINGS_FILE, path_in_repo=RULINGS_FILE, repo_id=dataset_id, repo_type="dataset", token=token) | |
| return "Full Sync Successful." | |
| except Exception as e: return f"Sync Failed: {str(e)}" | |
| def view_stats(): | |
| data = load_data(DATA_FILE) | |
| if not data: return "Empty" | |
| acts = {} | |
| for r in data: acts[r['act']] = acts.get(r['act'], 0) + 1 | |
| return "\n".join([f"{k}: {v}" for k,v in acts.items()]) | |
| # --- GRADIO UI --- | |
| USER_MANUAL_MD = """ | |
| # βοΈ Legislation & FCA Ruling Manager: User Manual | |
| ## π οΈ Building Your Rulebase | |
| 1. **Manual Entry**: Go to **Manage Rules** to paste specific sections from acts. | |
| 2. **Document Ingestion**: Upload **PDF, DOCX, or TXT** files of internal policies or acts. | |
| 3. **FCA Automation**: Use the **FCA Guidelines** tab to pull the **12 Principles for Businesses (PRIN)**. | |
| ## ποΈ Ruling on Issues | |
| 1. **Describing the Issue**: Go to **Rule on Issues**. Upload an **Issue Document** or type a description. | |
| 2. **Generating the Ruling**: Ensure your **Inference Settings** are configured, then click **Generate Formal Ruling**. | |
| 3. **Audible Delivery (TTS)**: Click play on the **Audio Player** to hear the ruling read aloud. | |
| ## βοΈ Sync & Audit | |
| - Go to **Sync & Audit**, enter your **HF Write Token** and **Dataset ID**, and click **Sync Everything**. | |
| """ | |
| with gr.Blocks(theme=gr.themes.Soft()) as demo: | |
| gr.Markdown("# βοΈ Legislation & FCA Ruling Manager") | |
| with gr.Tab("ποΈ Rule on Issues"): | |
| gr.Markdown("### π Issue Document Intake") | |
| with gr.Row(): | |
| with gr.Column(): | |
| issue_file = gr.File(label="Upload Issue Document (PDF/DOCX/TXT)", file_count="single") | |
| issue_desc = gr.TextArea(label="Additional Context / Issue Description") | |
| with gr.Accordion("βοΈ Inference Settings", open=False): | |
| end = gr.Textbox(label="Endpoint", value="http://localhost:11434/v1") | |
| key = gr.Textbox(label="Key", type="password") | |
| mod = gr.Textbox(label="Model", value="llama3") | |
| rule_btn = gr.Button("Generate Formal Ruling", variant="primary") | |
| with gr.Column(): | |
| ruling_out = gr.Markdown(label="Official Ruling") | |
| audio_out = gr.Audio(label="Audible Ruling", interactive=False) | |
| log_status = gr.Textbox(label="Audit Status") | |
| with gr.Tab("β Manage Rules"): | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.Markdown("#### π Add Legislation/Handbook via File") | |
| doc_act = gr.Textbox(label="Act/Source Name") | |
| doc_in = gr.File(label="Upload Rule Document") | |
| doc_btn = gr.Button("Process Rule") | |
| gr.Markdown("---") | |
| gr.Markdown("#### βοΈ Manual Rule Entry") | |
| m_act = gr.Textbox(label="Act"); m_tit = gr.Textbox(label="Title"); m_src = gr.Textbox(label="Source"); m_txt = gr.TextArea(label="Text") | |
| m_btn = gr.Button("Add Rule") | |
| with gr.Column(): | |
| op_status = gr.Textbox(label="Status") | |
| stats_out = gr.Textbox(label="Dataset Inventory", value=view_stats()) | |
| refresh_btn = gr.Button("Refresh Inventory") | |
| with gr.Tab("π¦ FCA Guidelines"): | |
| gr.Markdown("### π οΈ FCA Handbook Automation") | |
| fca_btn = gr.Button("Ingest FCA PRIN Principles", variant="secondary") | |
| fca_status = gr.Textbox(label="FCA Ingestion Status") | |
| with gr.Tab("βοΈ Sync & Audit"): | |
| hf_t = gr.Textbox(label="HF Token", type="password"); hf_d = gr.Textbox(label="Dataset ID") | |
| s_btn = gr.Button("Sync Everything to Dataset") | |
| s_out = gr.Textbox(label="Sync Status") | |
| with gr.Tab("π User Manual"): | |
| gr.Markdown(USER_MANUAL_MD) | |
| rule_btn.click(fn=rule_on_issue, inputs=[issue_desc, issue_file, end, key, mod], outputs=[ruling_out, log_status, audio_out]) | |
| doc_btn.click(fn=process_rule_document, inputs=[doc_in, doc_act], outputs=op_status) | |
| m_btn.click(fn=add_rule_manually, inputs=[m_act, m_tit, m_txt, m_src], outputs=op_status) | |
| refresh_btn.click(fn=view_stats, outputs=stats_out) | |
| s_btn.click(fn=sync_all, inputs=[hf_t, hf_d], outputs=s_out) | |
| if __name__ == "__main__": | |
| demo.launch() | |