Basementup's picture
Update app.py
0cea02d verified
Raw
History Blame Contribute Delete
9.37 kB
import gradio as gr
import json
import hashlib
import os
import requests
from bs4 import BeautifulSoup
from datetime import datetime
from huggingface_hub import HfApi
from pypdf import PdfReader
import docx
import traceback
from gtts import gTTS
DATA_FILE = "legislation_rules.json"
RULINGS_FILE = "rulings_log.json"
# --- CORE LOGIC ---
def load_data(file_path):
if os.path.exists(file_path):
with open(file_path, 'r') as f:
try:
return json.load(f)
except:
return []
return []
def save_data(data, file_path):
with open(file_path, 'w') as f:
json.dump(data, f, indent=2)
def get_canonical_hash(text):
return hashlib.sha256(text.strip().encode('utf-8')).hexdigest()
def extract_text_from_any(file_obj):
if file_obj is None: return ""
file_path = file_obj.name if hasattr(file_obj, 'name') else file_obj
if not os.path.exists(file_path): return ""
ext = os.path.splitext(file_path)[1].lower()
text = ""
try:
if ext == ".pdf":
reader = PdfReader(file_path)
text = "\n".join([p.extract_text() for p in reader.pages if p.extract_text()])
elif ext == ".docx":
doc = docx.Document(file_path)
text = "\n".join([p.text for p in doc.paragraphs])
elif ext == ".txt":
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
text = f.read()
except Exception as e:
print(f"Extraction error: {e}")
return text
def add_rule_manually(act, title, text, source):
data = load_data(DATA_FILE)
det_id = get_canonical_hash(text)
if any(r['deterministic_id'] == det_id for r in data):
return f"Warning: Rule '{title}' already exists."
new_rule = {
"act": act,
"section_title": title,
"text": text,
"source_url": source,
"deterministic_id": det_id,
"added_at": datetime.now().isoformat()
}
data.append(new_rule)
save_data(data, DATA_FILE)
return f"Successfully added: {title}"
def process_rule_document(file_obj, act_name):
text = extract_text_from_any(file_obj)
if not text: return "Error: Could not extract text from rule document."
return add_rule_manually(act_name, os.path.basename(file_obj.name), text, f"File: {os.path.basename(file_obj.name)}")
def generate_tts(text):
if not text or len(text) < 5: return None
try:
clean_text = text.replace("#", "").replace("*", "").replace("`", "")
tts = gTTS(text=clean_text[:1500], lang='en')
filename = f"ruling_audio_{int(datetime.now().timestamp())}.mp3"
tts.save(filename)
return filename
except Exception as e:
print(f"TTS Error: {e}")
return None
def rule_on_issue(issue_description, issue_file, llm_endpoint, llm_key, llm_model):
file_text = extract_text_from_any(issue_file)
combined_issue = f"{issue_description}\n\n[EXTRACTED FROM DOCUMENT]:\n{file_text}".strip()
if not combined_issue: return "Error: No issue description or document provided.", "", None
rules = load_data(DATA_FILE)
if not rules: return "Error: Dataset is empty. Add rules (CRA, FCA, etc.) first.", "", None
context = "\n".join([f"[{r['act']} - {r['section_title']}]: {r['text'][:600]}..." for r in rules[:20]])
prompt = f"You are a Regulatory Ruling Engine.\nISSUE TO ANALYZE:\n{combined_issue}\nDETERMINISTIC REFERENCE RULES:\n{context}\nTASK:\n1. Identify applicable rules.\n2. Provide a 'Formal Ruling' (Compliant/Non-Compliant/Warning).\n3. Cite Rule Titles and Deterministic IDs.\n4. Provide forensic justification."
ruling_text = "LLM Inference required for automated ruling."
if llm_endpoint and llm_key:
try:
headers = {"Authorization": f"Bearer {llm_key}", "Content-Type": "application/json"}
payload = {"model": llm_model, "messages": [{"role": "user", "content": prompt}], "temperature": 0}
response = requests.post(f"{llm_endpoint.rstrip('/')}/v1/chat/completions", json=payload, headers=headers, timeout=45)
ruling_text = response.json()['choices'][0]['message']['content']
except Exception as e: ruling_text = f"Inference Error: {str(e)}"
audio_file = generate_tts(ruling_text)
rulings_log = load_data(RULINGS_FILE)
new_ruling = {"timestamp": datetime.now().isoformat(), "issue_summary": combined_issue[:200], "ruling": ruling_text, "ruling_hash": hashlib.sha256(ruling_text.encode()).hexdigest()}
rulings_log.append(new_ruling)
save_data(rulings_log, RULINGS_FILE)
return ruling_text, f"Ruling Logged (ID: {new_ruling['ruling_hash'][:8]})", audio_file
def sync_all(token, dataset_id):
if not token or not dataset_id: return "Error: Missing credentials."
api = HfApi()
try:
api.upload_file(path_or_fileobj=DATA_FILE, path_in_repo=DATA_FILE, repo_id=dataset_id, repo_type="dataset", token=token)
if os.path.exists(RULINGS_FILE):
api.upload_file(path_or_fileobj=RULINGS_FILE, path_in_repo=RULINGS_FILE, repo_id=dataset_id, repo_type="dataset", token=token)
return "Full Sync Successful."
except Exception as e: return f"Sync Failed: {str(e)}"
def view_stats():
data = load_data(DATA_FILE)
if not data: return "Empty"
acts = {}
for r in data: acts[r['act']] = acts.get(r['act'], 0) + 1
return "\n".join([f"{k}: {v}" for k,v in acts.items()])
# --- GRADIO UI ---
USER_MANUAL_MD = """
# βš–οΈ Legislation & FCA Ruling Manager: User Manual
## πŸ› οΈ Building Your Rulebase
1. **Manual Entry**: Go to **Manage Rules** to paste specific sections from acts.
2. **Document Ingestion**: Upload **PDF, DOCX, or TXT** files of internal policies or acts.
3. **FCA Automation**: Use the **FCA Guidelines** tab to pull the **12 Principles for Businesses (PRIN)**.
## πŸ›οΈ Ruling on Issues
1. **Describing the Issue**: Go to **Rule on Issues**. Upload an **Issue Document** or type a description.
2. **Generating the Ruling**: Ensure your **Inference Settings** are configured, then click **Generate Formal Ruling**.
3. **Audible Delivery (TTS)**: Click play on the **Audio Player** to hear the ruling read aloud.
## ☁️ Sync & Audit
- Go to **Sync & Audit**, enter your **HF Write Token** and **Dataset ID**, and click **Sync Everything**.
"""
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("# βš–οΈ Legislation & FCA Ruling Manager")
with gr.Tab("πŸ›οΈ Rule on Issues"):
gr.Markdown("### πŸ“‚ Issue Document Intake")
with gr.Row():
with gr.Column():
issue_file = gr.File(label="Upload Issue Document (PDF/DOCX/TXT)", file_count="single")
issue_desc = gr.TextArea(label="Additional Context / Issue Description")
with gr.Accordion("βš™οΈ Inference Settings", open=False):
end = gr.Textbox(label="Endpoint", value="http://localhost:11434/v1")
key = gr.Textbox(label="Key", type="password")
mod = gr.Textbox(label="Model", value="llama3")
rule_btn = gr.Button("Generate Formal Ruling", variant="primary")
with gr.Column():
ruling_out = gr.Markdown(label="Official Ruling")
audio_out = gr.Audio(label="Audible Ruling", interactive=False)
log_status = gr.Textbox(label="Audit Status")
with gr.Tab("βž• Manage Rules"):
with gr.Row():
with gr.Column():
gr.Markdown("#### πŸ“‚ Add Legislation/Handbook via File")
doc_act = gr.Textbox(label="Act/Source Name")
doc_in = gr.File(label="Upload Rule Document")
doc_btn = gr.Button("Process Rule")
gr.Markdown("---")
gr.Markdown("#### ✍️ Manual Rule Entry")
m_act = gr.Textbox(label="Act"); m_tit = gr.Textbox(label="Title"); m_src = gr.Textbox(label="Source"); m_txt = gr.TextArea(label="Text")
m_btn = gr.Button("Add Rule")
with gr.Column():
op_status = gr.Textbox(label="Status")
stats_out = gr.Textbox(label="Dataset Inventory", value=view_stats())
refresh_btn = gr.Button("Refresh Inventory")
with gr.Tab("🏦 FCA Guidelines"):
gr.Markdown("### πŸ› οΈ FCA Handbook Automation")
fca_btn = gr.Button("Ingest FCA PRIN Principles", variant="secondary")
fca_status = gr.Textbox(label="FCA Ingestion Status")
with gr.Tab("☁️ Sync & Audit"):
hf_t = gr.Textbox(label="HF Token", type="password"); hf_d = gr.Textbox(label="Dataset ID")
s_btn = gr.Button("Sync Everything to Dataset")
s_out = gr.Textbox(label="Sync Status")
with gr.Tab("πŸ“– User Manual"):
gr.Markdown(USER_MANUAL_MD)
rule_btn.click(fn=rule_on_issue, inputs=[issue_desc, issue_file, end, key, mod], outputs=[ruling_out, log_status, audio_out])
doc_btn.click(fn=process_rule_document, inputs=[doc_in, doc_act], outputs=op_status)
m_btn.click(fn=add_rule_manually, inputs=[m_act, m_tit, m_txt, m_src], outputs=op_status)
refresh_btn.click(fn=view_stats, outputs=stats_out)
s_btn.click(fn=sync_all, inputs=[hf_t, hf_d], outputs=s_out)
if __name__ == "__main__":
demo.launch()