Spaces:
Sleeping
Sleeping
File size: 9,371 Bytes
951a8cb 0712aa0 ff5486a 3c3a958 951a8cb 142a4ac 951a8cb 0cea02d 142a4ac 951a8cb 142a4ac 951a8cb 142a4ac 951a8cb 142a4ac 951a8cb 142a4ac 951a8cb 142a4ac 951a8cb 3c3a958 0cea02d 3c3a958 0cea02d 3c3a958 142a4ac 0cea02d 142a4ac 0cea02d 142a4ac 0cea02d 142a4ac 0cea02d 3c3a958 142a4ac 0cea02d 142a4ac 3c3a958 951a8cb 142a4ac 951a8cb 142a4ac 951a8cb 142a4ac 951a8cb 142a4ac 951a8cb 0cea02d 951a8cb 142a4ac 951a8cb 142a4ac 951a8cb 142a4ac 3c3a958 142a4ac 0712aa0 142a4ac 951a8cb 142a4ac ff5486a 951a8cb 0cea02d 142a4ac 0cea02d 142a4ac 3c3a958 142a4ac 951a8cb | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 | import gradio as gr
import json
import hashlib
import os
import requests
from bs4 import BeautifulSoup
from datetime import datetime
from huggingface_hub import HfApi
from pypdf import PdfReader
import docx
import traceback
from gtts import gTTS
DATA_FILE = "legislation_rules.json"
RULINGS_FILE = "rulings_log.json"
# --- CORE LOGIC ---
def load_data(file_path):
if os.path.exists(file_path):
with open(file_path, 'r') as f:
try:
return json.load(f)
except:
return []
return []
def save_data(data, file_path):
with open(file_path, 'w') as f:
json.dump(data, f, indent=2)
def get_canonical_hash(text):
return hashlib.sha256(text.strip().encode('utf-8')).hexdigest()
def extract_text_from_any(file_obj):
if file_obj is None: return ""
file_path = file_obj.name if hasattr(file_obj, 'name') else file_obj
if not os.path.exists(file_path): return ""
ext = os.path.splitext(file_path)[1].lower()
text = ""
try:
if ext == ".pdf":
reader = PdfReader(file_path)
text = "\n".join([p.extract_text() for p in reader.pages if p.extract_text()])
elif ext == ".docx":
doc = docx.Document(file_path)
text = "\n".join([p.text for p in doc.paragraphs])
elif ext == ".txt":
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
text = f.read()
except Exception as e:
print(f"Extraction error: {e}")
return text
def add_rule_manually(act, title, text, source):
data = load_data(DATA_FILE)
det_id = get_canonical_hash(text)
if any(r['deterministic_id'] == det_id for r in data):
return f"Warning: Rule '{title}' already exists."
new_rule = {
"act": act,
"section_title": title,
"text": text,
"source_url": source,
"deterministic_id": det_id,
"added_at": datetime.now().isoformat()
}
data.append(new_rule)
save_data(data, DATA_FILE)
return f"Successfully added: {title}"
def process_rule_document(file_obj, act_name):
text = extract_text_from_any(file_obj)
if not text: return "Error: Could not extract text from rule document."
return add_rule_manually(act_name, os.path.basename(file_obj.name), text, f"File: {os.path.basename(file_obj.name)}")
def generate_tts(text):
if not text or len(text) < 5: return None
try:
clean_text = text.replace("#", "").replace("*", "").replace("`", "")
tts = gTTS(text=clean_text[:1500], lang='en')
filename = f"ruling_audio_{int(datetime.now().timestamp())}.mp3"
tts.save(filename)
return filename
except Exception as e:
print(f"TTS Error: {e}")
return None
def rule_on_issue(issue_description, issue_file, llm_endpoint, llm_key, llm_model):
file_text = extract_text_from_any(issue_file)
combined_issue = f"{issue_description}\n\n[EXTRACTED FROM DOCUMENT]:\n{file_text}".strip()
if not combined_issue: return "Error: No issue description or document provided.", "", None
rules = load_data(DATA_FILE)
if not rules: return "Error: Dataset is empty. Add rules (CRA, FCA, etc.) first.", "", None
context = "\n".join([f"[{r['act']} - {r['section_title']}]: {r['text'][:600]}..." for r in rules[:20]])
prompt = f"You are a Regulatory Ruling Engine.\nISSUE TO ANALYZE:\n{combined_issue}\nDETERMINISTIC REFERENCE RULES:\n{context}\nTASK:\n1. Identify applicable rules.\n2. Provide a 'Formal Ruling' (Compliant/Non-Compliant/Warning).\n3. Cite Rule Titles and Deterministic IDs.\n4. Provide forensic justification."
ruling_text = "LLM Inference required for automated ruling."
if llm_endpoint and llm_key:
try:
headers = {"Authorization": f"Bearer {llm_key}", "Content-Type": "application/json"}
payload = {"model": llm_model, "messages": [{"role": "user", "content": prompt}], "temperature": 0}
response = requests.post(f"{llm_endpoint.rstrip('/')}/v1/chat/completions", json=payload, headers=headers, timeout=45)
ruling_text = response.json()['choices'][0]['message']['content']
except Exception as e: ruling_text = f"Inference Error: {str(e)}"
audio_file = generate_tts(ruling_text)
rulings_log = load_data(RULINGS_FILE)
new_ruling = {"timestamp": datetime.now().isoformat(), "issue_summary": combined_issue[:200], "ruling": ruling_text, "ruling_hash": hashlib.sha256(ruling_text.encode()).hexdigest()}
rulings_log.append(new_ruling)
save_data(rulings_log, RULINGS_FILE)
return ruling_text, f"Ruling Logged (ID: {new_ruling['ruling_hash'][:8]})", audio_file
def sync_all(token, dataset_id):
if not token or not dataset_id: return "Error: Missing credentials."
api = HfApi()
try:
api.upload_file(path_or_fileobj=DATA_FILE, path_in_repo=DATA_FILE, repo_id=dataset_id, repo_type="dataset", token=token)
if os.path.exists(RULINGS_FILE):
api.upload_file(path_or_fileobj=RULINGS_FILE, path_in_repo=RULINGS_FILE, repo_id=dataset_id, repo_type="dataset", token=token)
return "Full Sync Successful."
except Exception as e: return f"Sync Failed: {str(e)}"
def view_stats():
data = load_data(DATA_FILE)
if not data: return "Empty"
acts = {}
for r in data: acts[r['act']] = acts.get(r['act'], 0) + 1
return "\n".join([f"{k}: {v}" for k,v in acts.items()])
# --- GRADIO UI ---
USER_MANUAL_MD = """
# βοΈ Legislation & FCA Ruling Manager: User Manual
## π οΈ Building Your Rulebase
1. **Manual Entry**: Go to **Manage Rules** to paste specific sections from acts.
2. **Document Ingestion**: Upload **PDF, DOCX, or TXT** files of internal policies or acts.
3. **FCA Automation**: Use the **FCA Guidelines** tab to pull the **12 Principles for Businesses (PRIN)**.
## ποΈ Ruling on Issues
1. **Describing the Issue**: Go to **Rule on Issues**. Upload an **Issue Document** or type a description.
2. **Generating the Ruling**: Ensure your **Inference Settings** are configured, then click **Generate Formal Ruling**.
3. **Audible Delivery (TTS)**: Click play on the **Audio Player** to hear the ruling read aloud.
## βοΈ Sync & Audit
- Go to **Sync & Audit**, enter your **HF Write Token** and **Dataset ID**, and click **Sync Everything**.
"""
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("# βοΈ Legislation & FCA Ruling Manager")
with gr.Tab("ποΈ Rule on Issues"):
gr.Markdown("### π Issue Document Intake")
with gr.Row():
with gr.Column():
issue_file = gr.File(label="Upload Issue Document (PDF/DOCX/TXT)", file_count="single")
issue_desc = gr.TextArea(label="Additional Context / Issue Description")
with gr.Accordion("βοΈ Inference Settings", open=False):
end = gr.Textbox(label="Endpoint", value="http://localhost:11434/v1")
key = gr.Textbox(label="Key", type="password")
mod = gr.Textbox(label="Model", value="llama3")
rule_btn = gr.Button("Generate Formal Ruling", variant="primary")
with gr.Column():
ruling_out = gr.Markdown(label="Official Ruling")
audio_out = gr.Audio(label="Audible Ruling", interactive=False)
log_status = gr.Textbox(label="Audit Status")
with gr.Tab("β Manage Rules"):
with gr.Row():
with gr.Column():
gr.Markdown("#### π Add Legislation/Handbook via File")
doc_act = gr.Textbox(label="Act/Source Name")
doc_in = gr.File(label="Upload Rule Document")
doc_btn = gr.Button("Process Rule")
gr.Markdown("---")
gr.Markdown("#### βοΈ Manual Rule Entry")
m_act = gr.Textbox(label="Act"); m_tit = gr.Textbox(label="Title"); m_src = gr.Textbox(label="Source"); m_txt = gr.TextArea(label="Text")
m_btn = gr.Button("Add Rule")
with gr.Column():
op_status = gr.Textbox(label="Status")
stats_out = gr.Textbox(label="Dataset Inventory", value=view_stats())
refresh_btn = gr.Button("Refresh Inventory")
with gr.Tab("π¦ FCA Guidelines"):
gr.Markdown("### π οΈ FCA Handbook Automation")
fca_btn = gr.Button("Ingest FCA PRIN Principles", variant="secondary")
fca_status = gr.Textbox(label="FCA Ingestion Status")
with gr.Tab("βοΈ Sync & Audit"):
hf_t = gr.Textbox(label="HF Token", type="password"); hf_d = gr.Textbox(label="Dataset ID")
s_btn = gr.Button("Sync Everything to Dataset")
s_out = gr.Textbox(label="Sync Status")
with gr.Tab("π User Manual"):
gr.Markdown(USER_MANUAL_MD)
rule_btn.click(fn=rule_on_issue, inputs=[issue_desc, issue_file, end, key, mod], outputs=[ruling_out, log_status, audio_out])
doc_btn.click(fn=process_rule_document, inputs=[doc_in, doc_act], outputs=op_status)
m_btn.click(fn=add_rule_manually, inputs=[m_act, m_tit, m_txt, m_src], outputs=op_status)
refresh_btn.click(fn=view_stats, outputs=stats_out)
s_btn.click(fn=sync_all, inputs=[hf_t, hf_d], outputs=s_out)
if __name__ == "__main__":
demo.launch()
|