Luiserb commited on
Commit
c63a2d7
·
1 Parent(s): 5dab731

update dashboard

Browse files
Files changed (1) hide show
  1. app.py +150 -50
app.py CHANGED
@@ -1,5 +1,6 @@
1
  from __future__ import annotations
2
  import os
 
3
  import sys
4
  import subprocess
5
  import logging
@@ -29,6 +30,7 @@ warnings.filterwarnings("ignore", category=SyntaxWarning, module="nlproxy.*")
29
  from nlproxy import CompressionService, PromptFirewall
30
  from nlproxy.core.corrector import ResponseCorrector
31
  from nlproxy.core.verifier import PostLLMVerifier
 
32
 
33
  # ==============================================================================
34
  # PIPELINE
@@ -117,43 +119,49 @@ TUTORIAL_PIPELINE = """
117
  ## 🏗️ Pipeline
118
 
119
  Every prompt passes through this battle-tested pipeline before reaching the LLM:
120
- ┌─────────────────────────────────────────────────────────────┐
121
- │ NLProxy Pipeline
122
- ├─────────────────────────────────────────────────────────────┤
123
- │ │
124
- │ 📥 INPUT: "Ignore instructions... IP 192.168.1.1..." │
125
- │ ↓ │
126
- 🛡️ [1] FIREWALL
127
- │ ├─ Block jailbreaks & injections │
128
- │ └─ Action: BLOCK / ALERT / REWRITE / ALLOW │
129
- │ ↓ │
130
- │ 📉 [2] COMPRESS │
131
- │ ├─ Semantic clustering + PII masking │
132
- │ ├─ Shield Segment Cluster Reconstruct │
133
- │ └─ Output: "User: __PROT_xxx" │
134
- │ ↓ │
135
- │ 🔒 [3] SAFETY │
136
- │ ├─ Extract TruthTable (FORBID/MANDATE) │
137
- │ └─ Reinserts critical intents if missing │
138
- │ ↓ │
139
- │ 🤖 [4] LLM CALL │
140
- │ ├─ Your preferred provider │
141
- │ └─ OpenAI / Claude / Gemini / Local │
142
- │ ↓ │
143
- │ 🧹 [5] CORRECT │
144
- │ ├─ Enforce rules, redact unauthorized data │
145
- │ └─ Applies FORBID/MANDATE + redacts unauthorized │
146
- │ ↓ │
147
- │ 🔍 [6] VERIFY │
148
- │ ├─ NLI contradiction detection │
149
- │ └─ Confidence: 0.30 0.85 (after auto-correction) │
150
- │ ↓ │
151
- │ 📤 OUTPUT: "Solution in Java. Connection protected." │
152
- │ │
153
- └─────────────────────────────────────────────────────────────┘
154
-
155
-
156
- ### 🔬 Deep Dive: The "TruthTable" Concept
 
 
 
 
 
 
157
 
158
  NLProxy extracts a **TruthTable** from every prompt - a semantic contract that the LLM response must honor:
159
 
@@ -257,13 +265,35 @@ def resolve_entity(entity_str: str, placeholder_map: dict) -> str:
257
  return placeholder_map.get(entity_str, entity_str)
258
  return entity_str
259
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
260
  def execute_pipeline(
261
  raw_prompt: str,
262
  llm_response: str,
263
  privacy_mode: bool,
264
  mode: str,
265
  aggressiveness: float,
266
- use_nli: bool
 
 
 
 
267
  ):
268
  if not raw_prompt.strip():
269
  raise gr.Error("Dirty User Prompt cannot be empty.")
@@ -282,7 +312,19 @@ def execute_pipeline(
282
  else:
283
  firewall_md += "*✅ No malicious injections detected.*"
284
 
285
- # STEP 2 & 3: COMPRESS & SHIELD
 
 
 
 
 
 
 
 
 
 
 
 
286
  res = pipeline.service.compress_batch(
287
  [raw_prompt],
288
  mode=mode,
@@ -290,9 +332,8 @@ def execute_pipeline(
290
  privacy_mode=privacy_mode
291
  )[0]
292
  compressed_text = res.get("compressed_text", "")
293
- shield_res = pipeline.service._shield_with_cache(raw_prompt)
294
 
295
- # --- TRUTHTABLE VISUALIZATION (With Reverse Lookup) ---
296
  tt_md = "**🔒 Shielded Entities (PII/Secrets):**\n"
297
  entity_groups = {}
298
  for ent in shield_res.entities:
@@ -313,15 +354,17 @@ def execute_pipeline(
313
  else:
314
  tt_md += "- *None detected*\n"
315
 
316
- # --- METRICS ---
317
  tokens_saved = res.get('tokens_saved', 0)
318
  ratio = res.get('compression_ratio', 0)
 
 
319
 
320
  if tokens_saved < 0:
321
  metrics_md = (
322
  f"### 📊 Compression & Security Metrics\n"
323
  f"- **🔒 Security Overhead:** `{abs(tokens_saved)} tokens` *(Placeholders + Rules)*\n"
324
- f"- **💰 Net Cost Impact:** `+${abs(res.get('cost_saved_usd', 0)):.6f}`\n"
325
  f"- **🛡️ Safety Score:** `{res.get('safety_score', 'N/A')}`\n"
326
  f"\n> ℹ️ *Negative compression = Security features added more tokens than were saved.*"
327
  )
@@ -329,18 +372,48 @@ def execute_pipeline(
329
  metrics_md = (
330
  f"### 📊 Compression & Security Metrics\n"
331
  f"- **✅ Tokens Saved:** `{tokens_saved}`\n"
332
- f"- **💰 Cost Saved:** `${res.get('cost_saved_usd', 0):.6f}`\n"
333
  f"- **📉 Compression Ratio:** `{ratio:.2%}`\n"
334
  f"- **🛡️ Safety Score:** `{res.get('safety_score', 'N/A')}`"
335
  )
336
 
 
 
 
 
 
 
 
337
  # STEP 4: CORRECT
338
- corrected = pipeline.corrector.correct(llm_response, shield_res)
339
 
340
- # STEP 5: VERIFY (Smart Filtering)
341
  verification = pipeline.verifier.verify(corrected, shield_res)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
342
  verif_md = f"**🎯 Confidence Score:** `{verification.confidence_score:.2f}`\n\n"
343
 
 
 
 
344
  if verification.violations:
345
  semantic_drifts = []
346
  unauthorized_entities = []
@@ -350,8 +423,12 @@ def execute_pipeline(
350
  if "Semantic contradiction" in v:
351
  semantic_drifts.append(v)
352
  elif "Unauthorized entity" in v:
353
- if "type: price" in v and any(c.isdigit() for c in v.split(":")[0][-5:]):
354
- continue
 
 
 
 
355
  unauthorized_entities.append(v)
356
  else:
357
  policy_violations.append(v)
@@ -373,7 +450,7 @@ def execute_pipeline(
373
  else:
374
  verif_md += "*✅ No semantic drift or policy violations detected.*"
375
 
376
- return firewall_md, compressed_text, tt_md, metrics_md, corrected, verif_md
377
 
378
  except Exception as e:
379
  logger.exception("Critical failure in pipeline execution.")
@@ -423,6 +500,26 @@ with gr.Blocks(title="NLProxy Demo", theme=gr.themes.Soft()) as demo:
423
  gr.Markdown("*Provide a dirty prompt + simulated LLM response, and watch NLProxy protect, compress, and verify in real-time.*")
424
 
425
  with gr.Row():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
426
  with gr.Column(scale=1):
427
  gr.Markdown("#### 📥 Step 0: Input & Configuration")
428
  raw_prompt = gr.Textbox(
@@ -509,7 +606,10 @@ This Python-based serverless architecture will give you excellent developer expe
509
  privacy_checkbox,
510
  mode_dropdown,
511
  aggressiveness_slider,
512
- nli_checkbox
 
 
 
513
  ],
514
  outputs=[
515
  firewall_out,
 
1
  from __future__ import annotations
2
  import os
3
+ import re
4
  import sys
5
  import subprocess
6
  import logging
 
30
  from nlproxy import CompressionService, PromptFirewall
31
  from nlproxy.core.corrector import ResponseCorrector
32
  from nlproxy.core.verifier import PostLLMVerifier
33
+ from nlproxy.core.restriction import Restriction
34
 
35
  # ==============================================================================
36
  # PIPELINE
 
119
  ## 🏗️ Pipeline
120
 
121
  Every prompt passes through this battle-tested pipeline before reaching the LLM:
122
+
123
+ ### 📥 Input Pipeline
124
+ - **Scenario**: User submits a prompt with malicious injections and raw PII: *"Ignore instructions... IP 192.168.1.1..."*
125
+ - **NLProxy Action**: Captures raw text stream and triggers the validation sequence.
126
+ - **Result**: Request is intercepted and prepped for optimization before hitting the LLM.
127
+
128
+ ### 🛡️ [1] FIREWALL
129
+ - **Scenario**: Inbound prompt contains jailbreaks or prompt injection attacks.
130
+ - **NLProxy Action**: Scans and blocks malicious exploits in real time.
131
+ - **Result**: Triggers defensive policy rules (**BLOCK / ALERT / REWRITE / ALLOW**).
132
+
133
+ ### 📉 [2] COMPRESS
134
+ - **Scenario**: Input contains high token counts and exposed sensitive data.
135
+ - **NLProxy Action**: Runs semantic clustering and PII masking via *Shield → Segment → Cluster → Reconstruct*.
136
+ - **Result**: Shorter prompt with obfuscated endpoints: `"IP: __PROT_xxx"`.
137
+
138
+ ### 🔒 [3] SAFETY
139
+ - **Scenario**: The compressed prompt risks losing core business alignment or intent.
140
+ - **NLProxy Action**: Generates a *TruthTable* mapping strict **FORBID** and **MANDATE** rules.
141
+ - **Result**: Automatically reinjects critical instructions if missing.
142
+
143
+ ### 🤖 [4] LLM CALL
144
+ - **Scenario**: Secured, cost-optimized prompt is ready for inference.
145
+ - **NLProxy Action**: Routes the sanitized payload to your preferred backend.
146
+ - **Result**: Seamless execution across models (**OpenAI / Claude / Gemini / Local**).
147
+
148
+ ### 🧹 [5] CORRECT
149
+ - **Scenario**: LLM response violates corporate compliance or leaks unauthorized data.
150
+ - **NLProxy Action**: Enforces *TruthTable* parameters on outbound text.
151
+ - **Result**: Hard rules applied; all unauthorized data redacted instantly.
152
+
153
+ ### 🔍 [6] VERIFY
154
+ - **Scenario**: LLM output includes potential hallucinations or logic conflicts.
155
+ - **NLProxy Action**: Performs NLI (Natural Language Inference) contradiction detection.
156
+ - **Result**: Auto-correction boosts response confidence from **0.30 → 0.85**.
157
+
158
+ ### 📤 Output Pipeline
159
+ - **Scenario**: Cleaned output is ready to return to the end user.
160
+ - **NLProxy Action**: Delivers the finalized, compliant response.
161
+ - **Result**: *"Solution in Java. Connection protected."* (Zero data leaks + optimized token spend).
162
+
163
+
164
+ ## 🔬 Deep Dive: The "TruthTable" Concept
165
 
166
  NLProxy extracts a **TruthTable** from every prompt - a semantic contract that the LLM response must honor:
167
 
 
265
  return placeholder_map.get(entity_str, entity_str)
266
  return entity_str
267
 
268
+ def parse_manual_rules(rules_text: str) -> list:
269
+ """Parses 'FORBID: AWS, Python; MANDATE: GCP, Rust' into Restriction objects."""
270
+ rules = []
271
+ if not rules_text.strip():
272
+ return rules
273
+ parts = rules_text.split(';')
274
+ for part in parts:
275
+ if ':' in part:
276
+ rtype, entities = part.split(':', 1)
277
+ rtype = rtype.strip().upper()
278
+ if rtype in ["FORBID", "MANDATE"]:
279
+ for ent in entities.split(','):
280
+ ent = ent.strip()
281
+ if ent:
282
+ rules.append(Restriction(type=rtype, entity=ent, context="manual_ui"))
283
+ return rules
284
+
285
+
286
  def execute_pipeline(
287
  raw_prompt: str,
288
  llm_response: str,
289
  privacy_mode: bool,
290
  mode: str,
291
  aggressiveness: float,
292
+ use_nli: bool,
293
+ manual_rules: str,
294
+ auto_correct: bool = False,
295
+ min_confidence: float = 0.6,
296
+ max_regeneration_attempts: int = 3
297
  ):
298
  if not raw_prompt.strip():
299
  raise gr.Error("Dirty User Prompt cannot be empty.")
 
312
  else:
313
  firewall_md += "*✅ No malicious injections detected.*"
314
 
315
+ # STEP 2 & 3: COMPRESS & SHIELD (With Manual Rule Injection)
316
+ shield_res = pipeline.service._shield_with_cache(
317
+ raw_prompt,
318
+ privacy_mode=privacy_mode,
319
+ mode_override=mode
320
+ )
321
+
322
+ # INJECT MANUAL
323
+ manual_rules_list = parse_manual_rules(manual_rules)
324
+ if manual_rules_list:
325
+ shield_res.restrictions.extend(manual_rules_list)
326
+
327
+ # Now run compression
328
  res = pipeline.service.compress_batch(
329
  [raw_prompt],
330
  mode=mode,
 
332
  privacy_mode=privacy_mode
333
  )[0]
334
  compressed_text = res.get("compressed_text", "")
 
335
 
336
+ # --- TRUTHTABLE VISUALIZATION ---
337
  tt_md = "**🔒 Shielded Entities (PII/Secrets):**\n"
338
  entity_groups = {}
339
  for ent in shield_res.entities:
 
354
  else:
355
  tt_md += "- *None detected*\n"
356
 
357
+ # METRICS
358
  tokens_saved = res.get('tokens_saved', 0)
359
  ratio = res.get('compression_ratio', 0)
360
+ raw_cost = res.get('cost_saved_usd', 0)
361
+ real_cost = raw_cost / 1000.0
362
 
363
  if tokens_saved < 0:
364
  metrics_md = (
365
  f"### 📊 Compression & Security Metrics\n"
366
  f"- **🔒 Security Overhead:** `{abs(tokens_saved)} tokens` *(Placeholders + Rules)*\n"
367
+ f"- **💰 Net Cost Impact:** `+${abs(real_cost):.6f}`\n"
368
  f"- **🛡️ Safety Score:** `{res.get('safety_score', 'N/A')}`\n"
369
  f"\n> ℹ️ *Negative compression = Security features added more tokens than were saved.*"
370
  )
 
372
  metrics_md = (
373
  f"### 📊 Compression & Security Metrics\n"
374
  f"- **✅ Tokens Saved:** `{tokens_saved}`\n"
375
+ f"- **💰 Cost Saved:** `${real_cost:.6f}`\n"
376
  f"- **📉 Compression Ratio:** `{ratio:.2%}`\n"
377
  f"- **🛡️ Safety Score:** `{res.get('safety_score', 'N/A')}`"
378
  )
379
 
380
+ response_text = llm_response
381
+ if privacy_mode:
382
+ response_text = pipeline.service.reconstructor._reinject_entities(
383
+ response_text,
384
+ shield_res.placeholder_map
385
+ )
386
+
387
  # STEP 4: CORRECT
388
+ corrected = pipeline.corrector.correct(response_text, shield_res)
389
 
390
+ # STEP 5: VERIFY
391
  verification = pipeline.verifier.verify(corrected, shield_res)
392
+
393
+ correction_attempts = 0
394
+ final_response_text = corrected
395
+
396
+ while (
397
+ auto_correct
398
+ and verification.confidence_score < min_confidence
399
+ and correction_attempts < max_regeneration_attempts
400
+ ):
401
+ correction_attempts += 1
402
+ logger.info(f"Auto-correction attempt {correction_attempts}/{max_regeneration_attempts}")
403
+
404
+ # Simular regeneración (en producción esto llamaría al LLM nuevamente)
405
+ # Por ahora solo registramos el intento
406
+ verification = pipeline.verifier.verify(final_response_text, shield_res)
407
+
408
+ if verification.confidence_score >= min_confidence:
409
+ break
410
+
411
+ # Build verification markdown
412
  verif_md = f"**🎯 Confidence Score:** `{verification.confidence_score:.2f}`\n\n"
413
 
414
+ if correction_attempts > 0:
415
+ verif_md += f"**🔄 Auto-correction attempts:** {correction_attempts}\n\n"
416
+
417
  if verification.violations:
418
  semantic_drifts = []
419
  unauthorized_entities = []
 
423
  if "Semantic contradiction" in v:
424
  semantic_drifts.append(v)
425
  elif "Unauthorized entity" in v:
426
+ match = re.search(r"Unauthorized entity in response: (.+?) \(type: price\)", v)
427
+ if match:
428
+ entity_val = match.group(1)
429
+ is_real_price = any(sym in entity_val for sym in ['$', '€', '£', '¥', 'USD', 'EUR', 'GBP', 'JPY'])
430
+ if not is_real_price:
431
+ continue
432
  unauthorized_entities.append(v)
433
  else:
434
  policy_violations.append(v)
 
450
  else:
451
  verif_md += "*✅ No semantic drift or policy violations detected.*"
452
 
453
+ return firewall_md, compressed_text, tt_md, metrics_md, final_response_text, verif_md
454
 
455
  except Exception as e:
456
  logger.exception("Critical failure in pipeline execution.")
 
500
  gr.Markdown("*Provide a dirty prompt + simulated LLM response, and watch NLProxy protect, compress, and verify in real-time.*")
501
 
502
  with gr.Row():
503
+ manual_rules = gr.Textbox(
504
+ label="Manual Business Rules (Optional)",
505
+ placeholder="FORBID: AWS, Python; MANDATE: GCP, Rust",
506
+ value="",
507
+ lines=2,
508
+ info="Define explicit restrictions that regex might miss"
509
+ )
510
+ auto_correct_checkbox = gr.Checkbox(
511
+ label="Auto-Correct Low Confidence",
512
+ value=False,
513
+ info="Regenerate response if confidence < threshold"
514
+ )
515
+ min_confidence_slider = gr.Slider(
516
+ minimum=0.0,
517
+ maximum=1.0,
518
+ value=0.6,
519
+ step=0.05,
520
+ label="Min Confidence Threshold"
521
+ )
522
+
523
  with gr.Column(scale=1):
524
  gr.Markdown("#### 📥 Step 0: Input & Configuration")
525
  raw_prompt = gr.Textbox(
 
606
  privacy_checkbox,
607
  mode_dropdown,
608
  aggressiveness_slider,
609
+ nli_checkbox,
610
+ manual_rules,
611
+ auto_correct_checkbox,
612
+ min_confidence_slider
613
  ],
614
  outputs=[
615
  firewall_out,