Armando Medina commited on
Update app.py
Browse files
app.py
CHANGED
|
@@ -5,7 +5,6 @@ from presidio_anonymizer import AnonymizerEngine
|
|
| 5 |
from presidio_image_redactor import ImageRedactorEngine
|
| 6 |
import numpy as np
|
| 7 |
import re
|
| 8 |
-
import os
|
| 9 |
from docx import Document
|
| 10 |
from PIL import Image
|
| 11 |
import pytesseract
|
|
@@ -262,7 +261,7 @@ def agentic_compliance(doc, regime):
|
|
| 262 |
exec_summary = executive_summary_template(relevant, score, regime)
|
| 263 |
|
| 264 |
findings_md = "\n".join([
|
| 265 |
-
f"- **{f['entity']}** ({f['text']}), score: {f.get('score', 0):.2f}"
|
| 266 |
for f in relevant
|
| 267 |
]) if relevant else "No relevant PII found for this regime."
|
| 268 |
|
|
@@ -278,10 +277,10 @@ def agentic_compliance(doc, regime):
|
|
| 278 |
fname = doc.name.lower()
|
| 279 |
if fname.endswith((".png", ".jpg", ".jpeg")):
|
| 280 |
redacted_file_path = redact_image_with_presidio(doc.name)
|
| 281 |
-
redacted_image = redacted_file_path
|
| 282 |
elif fname.endswith(".pdf"):
|
| 283 |
redacted_file_path = redact_pdf_with_presidio(doc.name)
|
| 284 |
-
redacted_image = None
|
| 285 |
|
| 286 |
md = f"""### Compliance Regime: **{regime}**
|
| 287 |
**Executive Summary:**
|
|
@@ -295,40 +294,25 @@ def agentic_compliance(doc, regime):
|
|
| 295 |
{summary}
|
| 296 |
---
|
| 297 |
{legend_md}
|
| 298 |
-
---
|
| 299 |
-
**Redacted Document Preview:**
|
| 300 |
-
<details>
|
| 301 |
-
<summary>Show/Hide Redacted Text</summary>
|
| 302 |
-
</details>
|
| 303 |
"""
|
| 304 |
return md.strip(), redacted_path, redacted_file_path, redacted_image
|
| 305 |
|
| 306 |
-
|
| 307 |
-
if not file:
|
| 308 |
-
return ""
|
| 309 |
-
if file.name.lower().endswith((".png", ".jpg", ".jpeg")):
|
| 310 |
-
return f"<img src='file={file.name}' width='500'/>"
|
| 311 |
-
elif file.name.lower().endswith(".pdf"):
|
| 312 |
-
return f"""<embed src='file={file.name}' width='100%' height='600px' type='application/pdf' />"""
|
| 313 |
-
else:
|
| 314 |
-
return "<i>No preview available for this file type.</i>"
|
| 315 |
|
| 316 |
with gr.Blocks(title="Agentic Compliance MCP Server") as demo:
|
| 317 |
gr.Markdown("# Agentic Compliance MCP\nUpload a document to check it for PII then select a compliance regime.")
|
| 318 |
with gr.Tab("Compliance Agent"):
|
| 319 |
doc = gr.File(label="Upload Document", file_types=SUPPORTED_FILE_TYPES)
|
| 320 |
-
preview = gr.HTML(label="Original Preview")
|
| 321 |
regime = gr.Dropdown(choices=list(COMPLIANCE_ENTITIES.keys()), label="Compliance Regime")
|
| 322 |
out = gr.Markdown(label="Compliance Output")
|
| 323 |
redacted_out = gr.File(label="Download Redacted Text")
|
| 324 |
file_redacted_out = gr.File(label="Download Redacted PDF/Image")
|
| 325 |
redacted_img = gr.Image(label="Redacted Image Preview")
|
| 326 |
|
| 327 |
-
doc.change(preview_file, inputs=doc, outputs=preview)
|
| 328 |
-
|
| 329 |
gr.Button("Run Compliance Agent").click(
|
| 330 |
agentic_compliance,
|
| 331 |
inputs=[doc, regime],
|
| 332 |
outputs=[out, redacted_out, file_redacted_out, redacted_img]
|
| 333 |
)
|
| 334 |
-
|
|
|
|
|
|
| 5 |
from presidio_image_redactor import ImageRedactorEngine
|
| 6 |
import numpy as np
|
| 7 |
import re
|
|
|
|
| 8 |
from docx import Document
|
| 9 |
from PIL import Image
|
| 10 |
import pytesseract
|
|
|
|
| 261 |
exec_summary = executive_summary_template(relevant, score, regime)
|
| 262 |
|
| 263 |
findings_md = "\n".join([
|
| 264 |
+
f"- **{f['entity']}** (`{f['text']}`), score: {f.get('score', 0):.2f}"
|
| 265 |
for f in relevant
|
| 266 |
]) if relevant else "No relevant PII found for this regime."
|
| 267 |
|
|
|
|
| 277 |
fname = doc.name.lower()
|
| 278 |
if fname.endswith((".png", ".jpg", ".jpeg")):
|
| 279 |
redacted_file_path = redact_image_with_presidio(doc.name)
|
| 280 |
+
redacted_image = redacted_file_path
|
| 281 |
elif fname.endswith(".pdf"):
|
| 282 |
redacted_file_path = redact_pdf_with_presidio(doc.name)
|
| 283 |
+
redacted_image = None
|
| 284 |
|
| 285 |
md = f"""### Compliance Regime: **{regime}**
|
| 286 |
**Executive Summary:**
|
|
|
|
| 294 |
{summary}
|
| 295 |
---
|
| 296 |
{legend_md}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 297 |
"""
|
| 298 |
return md.strip(), redacted_path, redacted_file_path, redacted_image
|
| 299 |
|
| 300 |
+
# ---- Gradio App UI: No previews ----
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 301 |
|
| 302 |
with gr.Blocks(title="Agentic Compliance MCP Server") as demo:
|
| 303 |
gr.Markdown("# Agentic Compliance MCP\nUpload a document to check it for PII then select a compliance regime.")
|
| 304 |
with gr.Tab("Compliance Agent"):
|
| 305 |
doc = gr.File(label="Upload Document", file_types=SUPPORTED_FILE_TYPES)
|
|
|
|
| 306 |
regime = gr.Dropdown(choices=list(COMPLIANCE_ENTITIES.keys()), label="Compliance Regime")
|
| 307 |
out = gr.Markdown(label="Compliance Output")
|
| 308 |
redacted_out = gr.File(label="Download Redacted Text")
|
| 309 |
file_redacted_out = gr.File(label="Download Redacted PDF/Image")
|
| 310 |
redacted_img = gr.Image(label="Redacted Image Preview")
|
| 311 |
|
|
|
|
|
|
|
| 312 |
gr.Button("Run Compliance Agent").click(
|
| 313 |
agentic_compliance,
|
| 314 |
inputs=[doc, regime],
|
| 315 |
outputs=[out, redacted_out, file_redacted_out, redacted_img]
|
| 316 |
)
|
| 317 |
+
|
| 318 |
+
demo.launch(mcp_server=True)
|