File size: 9,371 Bytes
951a8cb
 
 
 
 
 
 
 
0712aa0
 
ff5486a
3c3a958
951a8cb
142a4ac
 
951a8cb
0cea02d
 
142a4ac
 
 
951a8cb
 
 
 
 
 
142a4ac
 
951a8cb
 
 
 
 
142a4ac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
951a8cb
142a4ac
951a8cb
 
142a4ac
951a8cb
 
 
 
 
 
 
 
 
142a4ac
 
 
 
 
 
 
951a8cb
3c3a958
0cea02d
3c3a958
 
0cea02d
3c3a958
 
 
 
 
 
 
142a4ac
 
 
0cea02d
142a4ac
0cea02d
142a4ac
0cea02d
142a4ac
 
 
 
 
 
 
0cea02d
3c3a958
142a4ac
0cea02d
142a4ac
 
3c3a958
951a8cb
142a4ac
 
951a8cb
 
142a4ac
 
 
 
 
951a8cb
142a4ac
 
 
951a8cb
142a4ac
 
951a8cb
0cea02d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
951a8cb
142a4ac
951a8cb
142a4ac
 
951a8cb
 
142a4ac
 
 
 
 
 
 
 
 
3c3a958
142a4ac
 
 
 
 
 
 
 
 
0712aa0
142a4ac
 
 
951a8cb
142a4ac
 
ff5486a
951a8cb
0cea02d
 
 
 
 
142a4ac
 
 
 
0cea02d
 
 
142a4ac
3c3a958
142a4ac
 
 
 
951a8cb
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
import gradio as gr
import json
import hashlib
import os
import requests
from bs4 import BeautifulSoup
from datetime import datetime
from huggingface_hub import HfApi
from pypdf import PdfReader
import docx
import traceback
from gtts import gTTS

DATA_FILE = "legislation_rules.json"
RULINGS_FILE = "rulings_log.json"

# --- CORE LOGIC ---

def load_data(file_path):
    if os.path.exists(file_path):
        with open(file_path, 'r') as f:
            try:
                return json.load(f)
            except:
                return []
    return []

def save_data(data, file_path):
    with open(file_path, 'w') as f:
        json.dump(data, f, indent=2)

def get_canonical_hash(text):
    return hashlib.sha256(text.strip().encode('utf-8')).hexdigest()

def extract_text_from_any(file_obj):
    if file_obj is None: return ""
    file_path = file_obj.name if hasattr(file_obj, 'name') else file_obj
    if not os.path.exists(file_path): return ""
    ext = os.path.splitext(file_path)[1].lower()
    text = ""
    try:
        if ext == ".pdf":
            reader = PdfReader(file_path)
            text = "\n".join([p.extract_text() for p in reader.pages if p.extract_text()])
        elif ext == ".docx":
            doc = docx.Document(file_path)
            text = "\n".join([p.text for p in doc.paragraphs])
        elif ext == ".txt":
            with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
                text = f.read()
    except Exception as e:
        print(f"Extraction error: {e}")
    return text

def add_rule_manually(act, title, text, source):
    data = load_data(DATA_FILE)
    det_id = get_canonical_hash(text)
    if any(r['deterministic_id'] == det_id for r in data):
        return f"Warning: Rule '{title}' already exists."
    new_rule = {
        "act": act,
        "section_title": title,
        "text": text,
        "source_url": source,
        "deterministic_id": det_id,
        "added_at": datetime.now().isoformat()
    }
    data.append(new_rule)
    save_data(data, DATA_FILE)
    return f"Successfully added: {title}"

def process_rule_document(file_obj, act_name):
    text = extract_text_from_any(file_obj)
    if not text: return "Error: Could not extract text from rule document."
    return add_rule_manually(act_name, os.path.basename(file_obj.name), text, f"File: {os.path.basename(file_obj.name)}")

def generate_tts(text):
    if not text or len(text) < 5: return None
    try:
        clean_text = text.replace("#", "").replace("*", "").replace("`", "")
        tts = gTTS(text=clean_text[:1500], lang='en')
        filename = f"ruling_audio_{int(datetime.now().timestamp())}.mp3"
        tts.save(filename)
        return filename
    except Exception as e:
        print(f"TTS Error: {e}")
        return None

def rule_on_issue(issue_description, issue_file, llm_endpoint, llm_key, llm_model):
    file_text = extract_text_from_any(issue_file)
    combined_issue = f"{issue_description}\n\n[EXTRACTED FROM DOCUMENT]:\n{file_text}".strip()
    if not combined_issue: return "Error: No issue description or document provided.", "", None
    rules = load_data(DATA_FILE)
    if not rules: return "Error: Dataset is empty. Add rules (CRA, FCA, etc.) first.", "", None
    context = "\n".join([f"[{r['act']} - {r['section_title']}]: {r['text'][:600]}..." for r in rules[:20]])
    prompt = f"You are a Regulatory Ruling Engine.\nISSUE TO ANALYZE:\n{combined_issue}\nDETERMINISTIC REFERENCE RULES:\n{context}\nTASK:\n1. Identify applicable rules.\n2. Provide a 'Formal Ruling' (Compliant/Non-Compliant/Warning).\n3. Cite Rule Titles and Deterministic IDs.\n4. Provide forensic justification."
    ruling_text = "LLM Inference required for automated ruling."
    if llm_endpoint and llm_key:
        try:
            headers = {"Authorization": f"Bearer {llm_key}", "Content-Type": "application/json"}
            payload = {"model": llm_model, "messages": [{"role": "user", "content": prompt}], "temperature": 0}
            response = requests.post(f"{llm_endpoint.rstrip('/')}/v1/chat/completions", json=payload, headers=headers, timeout=45)
            ruling_text = response.json()['choices'][0]['message']['content']
        except Exception as e: ruling_text = f"Inference Error: {str(e)}"
    audio_file = generate_tts(ruling_text)
    rulings_log = load_data(RULINGS_FILE)
    new_ruling = {"timestamp": datetime.now().isoformat(), "issue_summary": combined_issue[:200], "ruling": ruling_text, "ruling_hash": hashlib.sha256(ruling_text.encode()).hexdigest()}
    rulings_log.append(new_ruling)
    save_data(rulings_log, RULINGS_FILE)
    return ruling_text, f"Ruling Logged (ID: {new_ruling['ruling_hash'][:8]})", audio_file

def sync_all(token, dataset_id):
    if not token or not dataset_id: return "Error: Missing credentials."
    api = HfApi()
    try:
        api.upload_file(path_or_fileobj=DATA_FILE, path_in_repo=DATA_FILE, repo_id=dataset_id, repo_type="dataset", token=token)
        if os.path.exists(RULINGS_FILE):
            api.upload_file(path_or_fileobj=RULINGS_FILE, path_in_repo=RULINGS_FILE, repo_id=dataset_id, repo_type="dataset", token=token)
        return "Full Sync Successful."
    except Exception as e: return f"Sync Failed: {str(e)}"

def view_stats():
    data = load_data(DATA_FILE)
    if not data: return "Empty"
    acts = {}
    for r in data: acts[r['act']] = acts.get(r['act'], 0) + 1
    return "\n".join([f"{k}: {v}" for k,v in acts.items()])

# --- GRADIO UI ---

USER_MANUAL_MD = """
# βš–οΈ Legislation & FCA Ruling Manager: User Manual

## πŸ› οΈ Building Your Rulebase
1. **Manual Entry**: Go to **Manage Rules** to paste specific sections from acts.
2. **Document Ingestion**: Upload **PDF, DOCX, or TXT** files of internal policies or acts.
3. **FCA Automation**: Use the **FCA Guidelines** tab to pull the **12 Principles for Businesses (PRIN)**.

## πŸ›οΈ Ruling on Issues
1. **Describing the Issue**: Go to **Rule on Issues**. Upload an **Issue Document** or type a description.
2. **Generating the Ruling**: Ensure your **Inference Settings** are configured, then click **Generate Formal Ruling**.
3. **Audible Delivery (TTS)**: Click play on the **Audio Player** to hear the ruling read aloud.

## ☁️ Sync & Audit
- Go to **Sync & Audit**, enter your **HF Write Token** and **Dataset ID**, and click **Sync Everything**.
"""

with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# βš–οΈ Legislation & FCA Ruling Manager")
    
    with gr.Tab("πŸ›οΈ Rule on Issues"):
        gr.Markdown("### πŸ“‚ Issue Document Intake")
        with gr.Row():
            with gr.Column():
                issue_file = gr.File(label="Upload Issue Document (PDF/DOCX/TXT)", file_count="single")
                issue_desc = gr.TextArea(label="Additional Context / Issue Description")
                with gr.Accordion("βš™οΈ Inference Settings", open=False):
                    end = gr.Textbox(label="Endpoint", value="http://localhost:11434/v1")
                    key = gr.Textbox(label="Key", type="password")
                    mod = gr.Textbox(label="Model", value="llama3")
                rule_btn = gr.Button("Generate Formal Ruling", variant="primary")
            with gr.Column():
                ruling_out = gr.Markdown(label="Official Ruling")
                audio_out = gr.Audio(label="Audible Ruling", interactive=False)
                log_status = gr.Textbox(label="Audit Status")

    with gr.Tab("βž• Manage Rules"):
        with gr.Row():
            with gr.Column():
                gr.Markdown("#### πŸ“‚ Add Legislation/Handbook via File")
                doc_act = gr.Textbox(label="Act/Source Name")
                doc_in = gr.File(label="Upload Rule Document")
                doc_btn = gr.Button("Process Rule")
                gr.Markdown("---")
                gr.Markdown("#### ✍️ Manual Rule Entry")
                m_act = gr.Textbox(label="Act"); m_tit = gr.Textbox(label="Title"); m_src = gr.Textbox(label="Source"); m_txt = gr.TextArea(label="Text")
                m_btn = gr.Button("Add Rule")
            with gr.Column():
                op_status = gr.Textbox(label="Status")
                stats_out = gr.Textbox(label="Dataset Inventory", value=view_stats())
                refresh_btn = gr.Button("Refresh Inventory")

    with gr.Tab("🏦 FCA Guidelines"):
        gr.Markdown("### πŸ› οΈ FCA Handbook Automation")
        fca_btn = gr.Button("Ingest FCA PRIN Principles", variant="secondary")
        fca_status = gr.Textbox(label="FCA Ingestion Status")

    with gr.Tab("☁️ Sync & Audit"):
        hf_t = gr.Textbox(label="HF Token", type="password"); hf_d = gr.Textbox(label="Dataset ID")
        s_btn = gr.Button("Sync Everything to Dataset")
        s_out = gr.Textbox(label="Sync Status")
        
    with gr.Tab("πŸ“– User Manual"):
        gr.Markdown(USER_MANUAL_MD)

    rule_btn.click(fn=rule_on_issue, inputs=[issue_desc, issue_file, end, key, mod], outputs=[ruling_out, log_status, audio_out])
    doc_btn.click(fn=process_rule_document, inputs=[doc_in, doc_act], outputs=op_status)
    m_btn.click(fn=add_rule_manually, inputs=[m_act, m_tit, m_txt, m_src], outputs=op_status)
    refresh_btn.click(fn=view_stats, outputs=stats_out)
    s_btn.click(fn=sync_all, inputs=[hf_t, hf_d], outputs=s_out)

if __name__ == "__main__":
    demo.launch()