Lexie / app.py
virginialevy's picture
Upload app.py
512acac verified
# app.py
from __future__ import annotations
import os, time, json
from pathlib import Path
from datetime import datetime
from typing import List, Tuple, Dict, Any
import gradio as gr
from lexie.call_agent import route as call_route
from lexie.pdf_reporter import generate_report
BASE_DIR = Path(__file__).resolve().parent
RUNTIME_BASE = Path("/data/lexie") if Path("/data").exists() else (BASE_DIR / "runtime")
RUNTIME_LOGS = RUNTIME_BASE / "logs"
RUNTIME_OUT = RUNTIME_BASE / "outputs"
MAX_PDF_MB = 10
DEFAULT_POLICIES = ["gdpr", "ai_act"]
def _ensure_dirs() -> None:
RUNTIME_LOGS.mkdir(parents=True, exist_ok=True)
RUNTIME_OUT.mkdir(parents=True, exist_ok=True)
def _ts() -> str:
return datetime.now().strftime("%Y%m%d-%H%M%S")
def _risk_badge(result: Dict[str, Any]) -> str:
score = int(result.get("risk_score", 0))
level = str(result.get("risk_level", "unknown")).lower()
color = {"low": "green", "medium": "orange", "high": "red"}.get(level, "gray")
fname = result.get("document_name") or ""
file_str = f" — **File:** `{fname}`" if fname else ""
return f"**Risk:** <span style='color:{color};font-weight:bold'>{level.upper()}</span> — **Score:** {score}/100{file_str}"
def _violations_table(result):
rows = []
vios = result.get("violations", []) or []
cites = result.get("citations", []) or []
for i, v in enumerate(vios):
page = cites[i].get("page") if i < len(cites) else None
page = page if page not in (None, "", "?") else "—"
rows.append([
v.get("law", ""),
v.get("article", "—"),
v.get("title", ""),
v.get("reason", ""),
page,
])
return rows
def _save_json(result: Dict[str, Any]) -> Path:
_ensure_dirs()
p = RUNTIME_LOGS / f"result-{_ts()}.json"
p.write_text(json.dumps(result, ensure_ascii=False, indent=2), encoding="utf-8")
print(f"[saved JSON] {p}")
return p
def _gen_pdf(result: Dict[str, Any]) -> Path:
_ensure_dirs()
p = RUNTIME_OUT / f"report_{_ts()}.pdf"
generate_report(result, str(p))
print(f"[saved PDF] {p}")
return p
def analyze_interface(
input_text: str,
input_pdf_path: str | None,
policies: List[str],
top_k: int,
) -> Tuple[str, List[Dict[str, Any]], str | None]:
if not os.getenv("OPENAI_API_KEY"):
return ("`OPENAI_API_KEY` is missing. Set it and retry.", [], None)
input_text = (input_text or "").strip()
if not input_pdf_path and not input_text:
return ("Please provide a PDF or some text.", [], None)
if input_pdf_path:
try:
size_mb = os.path.getsize(input_pdf_path) / (1024 * 1024)
except OSError:
return ("Uploaded PDF is not readable. Try another file.", [], None)
if size_mb > MAX_PDF_MB:
return (f"PDF too large: {size_mb:.1f} MB. Max {MAX_PDF_MB} MB.", [], None)
payload = {
"mode": "document",
"document_path": input_pdf_path,
"user_text": "",
"policies": policies or DEFAULT_POLICIES,
"top_k": int(top_k),
}
else:
payload = {
"mode": "free_text",
"document_path": "",
"user_text": input_text,
"policies": policies or DEFAULT_POLICIES,
"top_k": int(top_k),
}
result = call_route(payload)
# Nome file per il badge se PDF
try:
if input_pdf_path and payload.get("mode") == "document":
fname = os.path.basename(input_pdf_path)
result.setdefault("document_name", fname)
result.setdefault("document_path", input_pdf_path)
except Exception:
pass
if os.getenv("LEXIE_SAVE_JSON", "0") == "1":
_save_json(result)
# Genera PDF scaricabile
try:
pdf_path = _gen_pdf(result)
except Exception:
badge = _risk_badge(result)
table = _violations_table(result)
return (badge + " — PDF generation failed.", table, None)
badge = _risk_badge(result)
table = _violations_table(result)
return (badge, table, str(pdf_path))
def build_ui() -> gr.Blocks:
_ensure_dirs()
with gr.Blocks(title="Lexie — Compliance Copilot") as demo:
gr.Markdown("# Lexie — Compliance Copilot (MVP)")
gr.Markdown("Upload a **PDF** (≤ 10 MB) or paste **free text**. Policies default to GDPR + AI Act.")
with gr.Row():
txt = gr.Textbox(label="Free text", placeholder="Paste your text here...", lines=8)
pdf = gr.File(label="PDF upload", file_types=[".pdf"], type="filepath")
with gr.Row():
policies = gr.CheckboxGroup(choices=["gdpr", "ai_act"], value=DEFAULT_POLICIES, label="Policies")
topk = gr.Slider(1, 12, value=8, step=1, label="Top-K")
btn = gr.Button("Analyze", variant="primary")
out_badge = gr.Markdown(label="Risk")
out_table = gr.Dataframe(
headers=["Law", "Article", "Title", "Reason", "Page"],
label="Violations",
interactive=False
)
out_pdf = gr.File(label="Download report (PDF)")
btn.click(analyze_interface, inputs=[txt, pdf, policies, topk], outputs=[out_badge, out_table, out_pdf])
# Nota rimossa: niente messaggi fuorvianti sul salvataggio del PDF
return demo
if __name__ == "__main__":
print("Starting Lexie UI...")
ui = build_ui()
ui.launch()