Spaces:
Running
Running
update dashboard
Browse files
app.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
from __future__ import annotations
|
| 2 |
import os
|
|
|
|
| 3 |
import sys
|
| 4 |
import subprocess
|
| 5 |
import logging
|
|
@@ -29,6 +30,7 @@ warnings.filterwarnings("ignore", category=SyntaxWarning, module="nlproxy.*")
|
|
| 29 |
from nlproxy import CompressionService, PromptFirewall
|
| 30 |
from nlproxy.core.corrector import ResponseCorrector
|
| 31 |
from nlproxy.core.verifier import PostLLMVerifier
|
|
|
|
| 32 |
|
| 33 |
# ==============================================================================
|
| 34 |
# PIPELINE
|
|
@@ -117,43 +119,49 @@ TUTORIAL_PIPELINE = """
|
|
| 117 |
## 🏗️ Pipeline
|
| 118 |
|
| 119 |
Every prompt passes through this battle-tested pipeline before reaching the LLM:
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
###
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 157 |
|
| 158 |
NLProxy extracts a **TruthTable** from every prompt - a semantic contract that the LLM response must honor:
|
| 159 |
|
|
@@ -257,13 +265,35 @@ def resolve_entity(entity_str: str, placeholder_map: dict) -> str:
|
|
| 257 |
return placeholder_map.get(entity_str, entity_str)
|
| 258 |
return entity_str
|
| 259 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 260 |
def execute_pipeline(
|
| 261 |
raw_prompt: str,
|
| 262 |
llm_response: str,
|
| 263 |
privacy_mode: bool,
|
| 264 |
mode: str,
|
| 265 |
aggressiveness: float,
|
| 266 |
-
use_nli: bool
|
|
|
|
|
|
|
|
|
|
|
|
|
| 267 |
):
|
| 268 |
if not raw_prompt.strip():
|
| 269 |
raise gr.Error("Dirty User Prompt cannot be empty.")
|
|
@@ -282,7 +312,19 @@ def execute_pipeline(
|
|
| 282 |
else:
|
| 283 |
firewall_md += "*✅ No malicious injections detected.*"
|
| 284 |
|
| 285 |
-
# STEP 2 & 3: COMPRESS & SHIELD
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 286 |
res = pipeline.service.compress_batch(
|
| 287 |
[raw_prompt],
|
| 288 |
mode=mode,
|
|
@@ -290,9 +332,8 @@ def execute_pipeline(
|
|
| 290 |
privacy_mode=privacy_mode
|
| 291 |
)[0]
|
| 292 |
compressed_text = res.get("compressed_text", "")
|
| 293 |
-
shield_res = pipeline.service._shield_with_cache(raw_prompt)
|
| 294 |
|
| 295 |
-
# --- TRUTHTABLE VISUALIZATION
|
| 296 |
tt_md = "**🔒 Shielded Entities (PII/Secrets):**\n"
|
| 297 |
entity_groups = {}
|
| 298 |
for ent in shield_res.entities:
|
|
@@ -313,15 +354,17 @@ def execute_pipeline(
|
|
| 313 |
else:
|
| 314 |
tt_md += "- *None detected*\n"
|
| 315 |
|
| 316 |
-
#
|
| 317 |
tokens_saved = res.get('tokens_saved', 0)
|
| 318 |
ratio = res.get('compression_ratio', 0)
|
|
|
|
|
|
|
| 319 |
|
| 320 |
if tokens_saved < 0:
|
| 321 |
metrics_md = (
|
| 322 |
f"### 📊 Compression & Security Metrics\n"
|
| 323 |
f"- **🔒 Security Overhead:** `{abs(tokens_saved)} tokens` *(Placeholders + Rules)*\n"
|
| 324 |
-
f"- **💰 Net Cost Impact:** `+${abs(
|
| 325 |
f"- **🛡️ Safety Score:** `{res.get('safety_score', 'N/A')}`\n"
|
| 326 |
f"\n> ℹ️ *Negative compression = Security features added more tokens than were saved.*"
|
| 327 |
)
|
|
@@ -329,18 +372,48 @@ def execute_pipeline(
|
|
| 329 |
metrics_md = (
|
| 330 |
f"### 📊 Compression & Security Metrics\n"
|
| 331 |
f"- **✅ Tokens Saved:** `{tokens_saved}`\n"
|
| 332 |
-
f"- **💰 Cost Saved:** `${
|
| 333 |
f"- **📉 Compression Ratio:** `{ratio:.2%}`\n"
|
| 334 |
f"- **🛡️ Safety Score:** `{res.get('safety_score', 'N/A')}`"
|
| 335 |
)
|
| 336 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 337 |
# STEP 4: CORRECT
|
| 338 |
-
corrected = pipeline.corrector.correct(
|
| 339 |
|
| 340 |
-
# STEP 5: VERIFY
|
| 341 |
verification = pipeline.verifier.verify(corrected, shield_res)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 342 |
verif_md = f"**🎯 Confidence Score:** `{verification.confidence_score:.2f}`\n\n"
|
| 343 |
|
|
|
|
|
|
|
|
|
|
| 344 |
if verification.violations:
|
| 345 |
semantic_drifts = []
|
| 346 |
unauthorized_entities = []
|
|
@@ -350,8 +423,12 @@ def execute_pipeline(
|
|
| 350 |
if "Semantic contradiction" in v:
|
| 351 |
semantic_drifts.append(v)
|
| 352 |
elif "Unauthorized entity" in v:
|
| 353 |
-
|
| 354 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 355 |
unauthorized_entities.append(v)
|
| 356 |
else:
|
| 357 |
policy_violations.append(v)
|
|
@@ -373,7 +450,7 @@ def execute_pipeline(
|
|
| 373 |
else:
|
| 374 |
verif_md += "*✅ No semantic drift or policy violations detected.*"
|
| 375 |
|
| 376 |
-
return firewall_md, compressed_text, tt_md, metrics_md,
|
| 377 |
|
| 378 |
except Exception as e:
|
| 379 |
logger.exception("Critical failure in pipeline execution.")
|
|
@@ -423,6 +500,26 @@ with gr.Blocks(title="NLProxy Demo", theme=gr.themes.Soft()) as demo:
|
|
| 423 |
gr.Markdown("*Provide a dirty prompt + simulated LLM response, and watch NLProxy protect, compress, and verify in real-time.*")
|
| 424 |
|
| 425 |
with gr.Row():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 426 |
with gr.Column(scale=1):
|
| 427 |
gr.Markdown("#### 📥 Step 0: Input & Configuration")
|
| 428 |
raw_prompt = gr.Textbox(
|
|
@@ -509,7 +606,10 @@ This Python-based serverless architecture will give you excellent developer expe
|
|
| 509 |
privacy_checkbox,
|
| 510 |
mode_dropdown,
|
| 511 |
aggressiveness_slider,
|
| 512 |
-
nli_checkbox
|
|
|
|
|
|
|
|
|
|
| 513 |
],
|
| 514 |
outputs=[
|
| 515 |
firewall_out,
|
|
|
|
| 1 |
from __future__ import annotations
|
| 2 |
import os
|
| 3 |
+
import re
|
| 4 |
import sys
|
| 5 |
import subprocess
|
| 6 |
import logging
|
|
|
|
| 30 |
from nlproxy import CompressionService, PromptFirewall
|
| 31 |
from nlproxy.core.corrector import ResponseCorrector
|
| 32 |
from nlproxy.core.verifier import PostLLMVerifier
|
| 33 |
+
from nlproxy.core.restriction import Restriction
|
| 34 |
|
| 35 |
# ==============================================================================
|
| 36 |
# PIPELINE
|
|
|
|
| 119 |
## 🏗️ Pipeline
|
| 120 |
|
| 121 |
Every prompt passes through this battle-tested pipeline before reaching the LLM:
|
| 122 |
+
|
| 123 |
+
### 📥 Input Pipeline
|
| 124 |
+
- **Scenario**: User submits a prompt with malicious injections and raw PII: *"Ignore instructions... IP 192.168.1.1..."*
|
| 125 |
+
- **NLProxy Action**: Captures raw text stream and triggers the validation sequence.
|
| 126 |
+
- **Result**: Request is intercepted and prepped for optimization before hitting the LLM.
|
| 127 |
+
|
| 128 |
+
### 🛡️ [1] FIREWALL
|
| 129 |
+
- **Scenario**: Inbound prompt contains jailbreaks or prompt injection attacks.
|
| 130 |
+
- **NLProxy Action**: Scans and blocks malicious exploits in real time.
|
| 131 |
+
- **Result**: Triggers defensive policy rules (**BLOCK / ALERT / REWRITE / ALLOW**).
|
| 132 |
+
|
| 133 |
+
### 📉 [2] COMPRESS
|
| 134 |
+
- **Scenario**: Input contains high token counts and exposed sensitive data.
|
| 135 |
+
- **NLProxy Action**: Runs semantic clustering and PII masking via *Shield → Segment → Cluster → Reconstruct*.
|
| 136 |
+
- **Result**: Shorter prompt with obfuscated endpoints: `"IP: __PROT_xxx"`.
|
| 137 |
+
|
| 138 |
+
### 🔒 [3] SAFETY
|
| 139 |
+
- **Scenario**: The compressed prompt risks losing core business alignment or intent.
|
| 140 |
+
- **NLProxy Action**: Generates a *TruthTable* mapping strict **FORBID** and **MANDATE** rules.
|
| 141 |
+
- **Result**: Automatically reinjects critical instructions if missing.
|
| 142 |
+
|
| 143 |
+
### 🤖 [4] LLM CALL
|
| 144 |
+
- **Scenario**: Secured, cost-optimized prompt is ready for inference.
|
| 145 |
+
- **NLProxy Action**: Routes the sanitized payload to your preferred backend.
|
| 146 |
+
- **Result**: Seamless execution across models (**OpenAI / Claude / Gemini / Local**).
|
| 147 |
+
|
| 148 |
+
### 🧹 [5] CORRECT
|
| 149 |
+
- **Scenario**: LLM response violates corporate compliance or leaks unauthorized data.
|
| 150 |
+
- **NLProxy Action**: Enforces *TruthTable* parameters on outbound text.
|
| 151 |
+
- **Result**: Hard rules applied; all unauthorized data redacted instantly.
|
| 152 |
+
|
| 153 |
+
### 🔍 [6] VERIFY
|
| 154 |
+
- **Scenario**: LLM output includes potential hallucinations or logic conflicts.
|
| 155 |
+
- **NLProxy Action**: Performs NLI (Natural Language Inference) contradiction detection.
|
| 156 |
+
- **Result**: Auto-correction boosts response confidence from **0.30 → 0.85**.
|
| 157 |
+
|
| 158 |
+
### 📤 Output Pipeline
|
| 159 |
+
- **Scenario**: Cleaned output is ready to return to the end user.
|
| 160 |
+
- **NLProxy Action**: Delivers the finalized, compliant response.
|
| 161 |
+
- **Result**: *"Solution in Java. Connection protected."* (Zero data leaks + optimized token spend).
|
| 162 |
+
|
| 163 |
+
|
| 164 |
+
## 🔬 Deep Dive: The "TruthTable" Concept
|
| 165 |
|
| 166 |
NLProxy extracts a **TruthTable** from every prompt - a semantic contract that the LLM response must honor:
|
| 167 |
|
|
|
|
| 265 |
return placeholder_map.get(entity_str, entity_str)
|
| 266 |
return entity_str
|
| 267 |
|
| 268 |
+
def parse_manual_rules(rules_text: str) -> list:
|
| 269 |
+
"""Parses 'FORBID: AWS, Python; MANDATE: GCP, Rust' into Restriction objects."""
|
| 270 |
+
rules = []
|
| 271 |
+
if not rules_text.strip():
|
| 272 |
+
return rules
|
| 273 |
+
parts = rules_text.split(';')
|
| 274 |
+
for part in parts:
|
| 275 |
+
if ':' in part:
|
| 276 |
+
rtype, entities = part.split(':', 1)
|
| 277 |
+
rtype = rtype.strip().upper()
|
| 278 |
+
if rtype in ["FORBID", "MANDATE"]:
|
| 279 |
+
for ent in entities.split(','):
|
| 280 |
+
ent = ent.strip()
|
| 281 |
+
if ent:
|
| 282 |
+
rules.append(Restriction(type=rtype, entity=ent, context="manual_ui"))
|
| 283 |
+
return rules
|
| 284 |
+
|
| 285 |
+
|
| 286 |
def execute_pipeline(
|
| 287 |
raw_prompt: str,
|
| 288 |
llm_response: str,
|
| 289 |
privacy_mode: bool,
|
| 290 |
mode: str,
|
| 291 |
aggressiveness: float,
|
| 292 |
+
use_nli: bool,
|
| 293 |
+
manual_rules: str,
|
| 294 |
+
auto_correct: bool = False,
|
| 295 |
+
min_confidence: float = 0.6,
|
| 296 |
+
max_regeneration_attempts: int = 3
|
| 297 |
):
|
| 298 |
if not raw_prompt.strip():
|
| 299 |
raise gr.Error("Dirty User Prompt cannot be empty.")
|
|
|
|
| 312 |
else:
|
| 313 |
firewall_md += "*✅ No malicious injections detected.*"
|
| 314 |
|
| 315 |
+
# STEP 2 & 3: COMPRESS & SHIELD (With Manual Rule Injection)
|
| 316 |
+
shield_res = pipeline.service._shield_with_cache(
|
| 317 |
+
raw_prompt,
|
| 318 |
+
privacy_mode=privacy_mode,
|
| 319 |
+
mode_override=mode
|
| 320 |
+
)
|
| 321 |
+
|
| 322 |
+
# INJECT MANUAL
|
| 323 |
+
manual_rules_list = parse_manual_rules(manual_rules)
|
| 324 |
+
if manual_rules_list:
|
| 325 |
+
shield_res.restrictions.extend(manual_rules_list)
|
| 326 |
+
|
| 327 |
+
# Now run compression
|
| 328 |
res = pipeline.service.compress_batch(
|
| 329 |
[raw_prompt],
|
| 330 |
mode=mode,
|
|
|
|
| 332 |
privacy_mode=privacy_mode
|
| 333 |
)[0]
|
| 334 |
compressed_text = res.get("compressed_text", "")
|
|
|
|
| 335 |
|
| 336 |
+
# --- TRUTHTABLE VISUALIZATION ---
|
| 337 |
tt_md = "**🔒 Shielded Entities (PII/Secrets):**\n"
|
| 338 |
entity_groups = {}
|
| 339 |
for ent in shield_res.entities:
|
|
|
|
| 354 |
else:
|
| 355 |
tt_md += "- *None detected*\n"
|
| 356 |
|
| 357 |
+
# METRICS
|
| 358 |
tokens_saved = res.get('tokens_saved', 0)
|
| 359 |
ratio = res.get('compression_ratio', 0)
|
| 360 |
+
raw_cost = res.get('cost_saved_usd', 0)
|
| 361 |
+
real_cost = raw_cost / 1000.0
|
| 362 |
|
| 363 |
if tokens_saved < 0:
|
| 364 |
metrics_md = (
|
| 365 |
f"### 📊 Compression & Security Metrics\n"
|
| 366 |
f"- **🔒 Security Overhead:** `{abs(tokens_saved)} tokens` *(Placeholders + Rules)*\n"
|
| 367 |
+
f"- **💰 Net Cost Impact:** `+${abs(real_cost):.6f}`\n"
|
| 368 |
f"- **🛡️ Safety Score:** `{res.get('safety_score', 'N/A')}`\n"
|
| 369 |
f"\n> ℹ️ *Negative compression = Security features added more tokens than were saved.*"
|
| 370 |
)
|
|
|
|
| 372 |
metrics_md = (
|
| 373 |
f"### 📊 Compression & Security Metrics\n"
|
| 374 |
f"- **✅ Tokens Saved:** `{tokens_saved}`\n"
|
| 375 |
+
f"- **💰 Cost Saved:** `${real_cost:.6f}`\n"
|
| 376 |
f"- **📉 Compression Ratio:** `{ratio:.2%}`\n"
|
| 377 |
f"- **🛡️ Safety Score:** `{res.get('safety_score', 'N/A')}`"
|
| 378 |
)
|
| 379 |
|
| 380 |
+
response_text = llm_response
|
| 381 |
+
if privacy_mode:
|
| 382 |
+
response_text = pipeline.service.reconstructor._reinject_entities(
|
| 383 |
+
response_text,
|
| 384 |
+
shield_res.placeholder_map
|
| 385 |
+
)
|
| 386 |
+
|
| 387 |
# STEP 4: CORRECT
|
| 388 |
+
corrected = pipeline.corrector.correct(response_text, shield_res)
|
| 389 |
|
| 390 |
+
# STEP 5: VERIFY
|
| 391 |
verification = pipeline.verifier.verify(corrected, shield_res)
|
| 392 |
+
|
| 393 |
+
correction_attempts = 0
|
| 394 |
+
final_response_text = corrected
|
| 395 |
+
|
| 396 |
+
while (
|
| 397 |
+
auto_correct
|
| 398 |
+
and verification.confidence_score < min_confidence
|
| 399 |
+
and correction_attempts < max_regeneration_attempts
|
| 400 |
+
):
|
| 401 |
+
correction_attempts += 1
|
| 402 |
+
logger.info(f"Auto-correction attempt {correction_attempts}/{max_regeneration_attempts}")
|
| 403 |
+
|
| 404 |
+
# Simular regeneración (en producción esto llamaría al LLM nuevamente)
|
| 405 |
+
# Por ahora solo registramos el intento
|
| 406 |
+
verification = pipeline.verifier.verify(final_response_text, shield_res)
|
| 407 |
+
|
| 408 |
+
if verification.confidence_score >= min_confidence:
|
| 409 |
+
break
|
| 410 |
+
|
| 411 |
+
# Build verification markdown
|
| 412 |
verif_md = f"**🎯 Confidence Score:** `{verification.confidence_score:.2f}`\n\n"
|
| 413 |
|
| 414 |
+
if correction_attempts > 0:
|
| 415 |
+
verif_md += f"**🔄 Auto-correction attempts:** {correction_attempts}\n\n"
|
| 416 |
+
|
| 417 |
if verification.violations:
|
| 418 |
semantic_drifts = []
|
| 419 |
unauthorized_entities = []
|
|
|
|
| 423 |
if "Semantic contradiction" in v:
|
| 424 |
semantic_drifts.append(v)
|
| 425 |
elif "Unauthorized entity" in v:
|
| 426 |
+
match = re.search(r"Unauthorized entity in response: (.+?) \(type: price\)", v)
|
| 427 |
+
if match:
|
| 428 |
+
entity_val = match.group(1)
|
| 429 |
+
is_real_price = any(sym in entity_val for sym in ['$', '€', '£', '¥', 'USD', 'EUR', 'GBP', 'JPY'])
|
| 430 |
+
if not is_real_price:
|
| 431 |
+
continue
|
| 432 |
unauthorized_entities.append(v)
|
| 433 |
else:
|
| 434 |
policy_violations.append(v)
|
|
|
|
| 450 |
else:
|
| 451 |
verif_md += "*✅ No semantic drift or policy violations detected.*"
|
| 452 |
|
| 453 |
+
return firewall_md, compressed_text, tt_md, metrics_md, final_response_text, verif_md
|
| 454 |
|
| 455 |
except Exception as e:
|
| 456 |
logger.exception("Critical failure in pipeline execution.")
|
|
|
|
| 500 |
gr.Markdown("*Provide a dirty prompt + simulated LLM response, and watch NLProxy protect, compress, and verify in real-time.*")
|
| 501 |
|
| 502 |
with gr.Row():
|
| 503 |
+
manual_rules = gr.Textbox(
|
| 504 |
+
label="Manual Business Rules (Optional)",
|
| 505 |
+
placeholder="FORBID: AWS, Python; MANDATE: GCP, Rust",
|
| 506 |
+
value="",
|
| 507 |
+
lines=2,
|
| 508 |
+
info="Define explicit restrictions that regex might miss"
|
| 509 |
+
)
|
| 510 |
+
auto_correct_checkbox = gr.Checkbox(
|
| 511 |
+
label="Auto-Correct Low Confidence",
|
| 512 |
+
value=False,
|
| 513 |
+
info="Regenerate response if confidence < threshold"
|
| 514 |
+
)
|
| 515 |
+
min_confidence_slider = gr.Slider(
|
| 516 |
+
minimum=0.0,
|
| 517 |
+
maximum=1.0,
|
| 518 |
+
value=0.6,
|
| 519 |
+
step=0.05,
|
| 520 |
+
label="Min Confidence Threshold"
|
| 521 |
+
)
|
| 522 |
+
|
| 523 |
with gr.Column(scale=1):
|
| 524 |
gr.Markdown("#### 📥 Step 0: Input & Configuration")
|
| 525 |
raw_prompt = gr.Textbox(
|
|
|
|
| 606 |
privacy_checkbox,
|
| 607 |
mode_dropdown,
|
| 608 |
aggressiveness_slider,
|
| 609 |
+
nli_checkbox,
|
| 610 |
+
manual_rules,
|
| 611 |
+
auto_correct_checkbox,
|
| 612 |
+
min_confidence_slider
|
| 613 |
],
|
| 614 |
outputs=[
|
| 615 |
firewall_out,
|