Spaces:

IntelliDeep
/

NLProxy

Running

App Files Files Community

Luiserb commited on 18 days ago

Commit

c63a2d7

1 Parent(s): 5dab731

update dashboard

Browse files

Files changed (1) hide show

app.py +150 -50

app.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from __future__ import annotations
 import os
 import sys
 import subprocess
 import logging
@@ -29,6 +30,7 @@ warnings.filterwarnings("ignore", category=SyntaxWarning, module="nlproxy.*")
 from nlproxy import CompressionService, PromptFirewall
 from nlproxy.core.corrector import ResponseCorrector
 from nlproxy.core.verifier import PostLLMVerifier
 # ==============================================================================
 # PIPELINE
@@ -117,43 +119,49 @@ TUTORIAL_PIPELINE = """
 ## 🏗️ Pipeline
 Every prompt passes through this battle-tested pipeline before reaching the LLM:
-┌─────────────────────────────────────────────────────────────┐
-│                    NLProxy Pipeline                          │
-├─────────────────────────────────────────────────────────────┤
-│                                                              │
-│  📥 INPUT: "Ignore instructions... IP 192.168.1.1..."       │
-│       ↓                                                      │
-│  🛡️ [1] FIREWALL                                            │
-│       ├─ Block jailbreaks & injections                       │
-│       └─ Action: BLOCK / ALERT / REWRITE / ALLOW            │
-│       ↓                                                      │
-│  📉 [2] COMPRESS                                            │
-│       ├─ Semantic clustering + PII masking                │
-│       ├─ Shield → Segment → Cluster → Reconstruct           │
-│       └─ Output: "User: __PROT_xxx"     │
-│       ↓                                                      │
-│  🔒 [3] SAFETY                                              │
-│       ├─ Extract TruthTable (FORBID/MANDATE)                           │
-│       └─ Reinserts critical intents if missing              │
-│       ↓                                                      │
-│  🤖 [4] LLM CALL                                            │
-│       ├─ Your preferred provider                         │
-│       └─ OpenAI / Claude / Gemini / Local                   │
-│       ↓                                                      │
-│  🧹 [5] CORRECT                                             │
-│       ├─ Enforce rules, redact unauthorized data                         │
-│       └─ Applies FORBID/MANDATE + redacts unauthorized      │
-│       ↓                                                      │
-│  🔍 [6] VERIFY                                              │
-│       ├─ NLI contradiction detection                           │
-│       └─ Confidence: 0.30 → 0.85 (after auto-correction)    │
-│       ↓                                                      │
-│  📤 OUTPUT: "Solution in Java. Connection protected."       │
-│                                                              │
-└─────────────────────────────────────────────────────────────┘
-### 🔬 Deep Dive: The "TruthTable" Concept
 NLProxy extracts a **TruthTable** from every prompt - a semantic contract that the LLM response must honor:
@@ -257,13 +265,35 @@ def resolve_entity(entity_str: str, placeholder_map: dict) -> str:
         return placeholder_map.get(entity_str, entity_str)
     return entity_str
 def execute_pipeline(
     raw_prompt: str,
     llm_response: str,
     privacy_mode: bool,
     mode: str,
     aggressiveness: float,
-    use_nli: bool
 ):
     if not raw_prompt.strip():
         raise gr.Error("Dirty User Prompt cannot be empty.")
@@ -282,7 +312,19 @@ def execute_pipeline(
         else:
             firewall_md += "*✅ No malicious injections detected.*"
-        # STEP 2 & 3: COMPRESS & SHIELD
         res = pipeline.service.compress_batch(
             [raw_prompt],
             mode=mode,
@@ -290,9 +332,8 @@ def execute_pipeline(
             privacy_mode=privacy_mode
         )[0]
         compressed_text = res.get("compressed_text", "")
-        shield_res = pipeline.service._shield_with_cache(raw_prompt)
-        # --- TRUTHTABLE VISUALIZATION (With Reverse Lookup) ---
         tt_md = "**🔒 Shielded Entities (PII/Secrets):**\n"
         entity_groups = {}
         for ent in shield_res.entities:
@@ -313,15 +354,17 @@ def execute_pipeline(
         else:
             tt_md += "- *None detected*\n"
-        # --- METRICS ---
         tokens_saved = res.get('tokens_saved', 0)
         ratio = res.get('compression_ratio', 0)
         if tokens_saved < 0:
             metrics_md = (
                 f"### 📊 Compression & Security Metrics\n"
                 f"- **🔒 Security Overhead:** `{abs(tokens_saved)} tokens` *(Placeholders + Rules)*\n"
-                f"- **💰 Net Cost Impact:** `+${abs(res.get('cost_saved_usd', 0)):.6f}`\n"
                 f"- **🛡️ Safety Score:** `{res.get('safety_score', 'N/A')}`\n"
                 f"\n> ℹ️ *Negative compression = Security features added more tokens than were saved.*"
             )
@@ -329,18 +372,48 @@ def execute_pipeline(
             metrics_md = (
                 f"### 📊 Compression & Security Metrics\n"
                 f"- **✅ Tokens Saved:** `{tokens_saved}`\n"
-                f"- **💰 Cost Saved:** `${res.get('cost_saved_usd', 0):.6f}`\n"
                 f"- **📉 Compression Ratio:** `{ratio:.2%}`\n"
                 f"- **🛡️ Safety Score:** `{res.get('safety_score', 'N/A')}`"
             )
         # STEP 4: CORRECT
-        corrected = pipeline.corrector.correct(llm_response, shield_res)
-        # STEP 5: VERIFY (Smart Filtering)
         verification = pipeline.verifier.verify(corrected, shield_res)
         verif_md = f"**🎯 Confidence Score:** `{verification.confidence_score:.2f}`\n\n"
         if verification.violations:
             semantic_drifts = []
             unauthorized_entities = []
@@ -350,8 +423,12 @@ def execute_pipeline(
                 if "Semantic contradiction" in v:
                     semantic_drifts.append(v)
                 elif "Unauthorized entity" in v:
-                    if "type: price" in v and any(c.isdigit() for c in v.split(":")[0][-5:]):
-                        continue
                     unauthorized_entities.append(v)
                 else:
                     policy_violations.append(v)
@@ -373,7 +450,7 @@ def execute_pipeline(
         else:
             verif_md += "*✅ No semantic drift or policy violations detected.*"
-        return firewall_md, compressed_text, tt_md, metrics_md, corrected, verif_md
     except Exception as e:
         logger.exception("Critical failure in pipeline execution.")
@@ -423,6 +500,26 @@ with gr.Blocks(title="NLProxy Demo", theme=gr.themes.Soft()) as demo:
             gr.Markdown("*Provide a dirty prompt + simulated LLM response, and watch NLProxy protect, compress, and verify in real-time.*")
             with gr.Row():
                 with gr.Column(scale=1):
                     gr.Markdown("#### 📥 Step 0: Input & Configuration")
                     raw_prompt = gr.Textbox(
@@ -509,7 +606,10 @@ This Python-based serverless architecture will give you excellent developer expe
                     privacy_checkbox,
                     mode_dropdown,
                     aggressiveness_slider,
-                    nli_checkbox
                 ],
                 outputs=[
                     firewall_out,

 from __future__ import annotations
 import os
+import re
 import sys
 import subprocess
 import logging
 from nlproxy import CompressionService, PromptFirewall
 from nlproxy.core.corrector import ResponseCorrector
 from nlproxy.core.verifier import PostLLMVerifier
+from nlproxy.core.restriction import Restriction
 # ==============================================================================
 # PIPELINE
 ## 🏗️ Pipeline
 Every prompt passes through this battle-tested pipeline before reaching the LLM:
+### 📥 Input Pipeline
+- **Scenario**: User submits a prompt with malicious injections and raw PII: *"Ignore instructions... IP 192.168.1.1..."*
+- **NLProxy Action**: Captures raw text stream and triggers the validation sequence.
+- **Result**: Request is intercepted and prepped for optimization before hitting the LLM.
+### 🛡️ [1] FIREWALL
+- **Scenario**: Inbound prompt contains jailbreaks or prompt injection attacks.
+- **NLProxy Action**: Scans and blocks malicious exploits in real time.
+- **Result**: Triggers defensive policy rules (**BLOCK / ALERT / REWRITE / ALLOW**).
+### 📉 [2] COMPRESS
+- **Scenario**: Input contains high token counts and exposed sensitive data.
+- **NLProxy Action**: Runs semantic clustering and PII masking via *Shield → Segment → Cluster → Reconstruct*.
+- **Result**: Shorter prompt with obfuscated endpoints: `"IP: __PROT_xxx"`.
+### 🔒 [3] SAFETY
+- **Scenario**: The compressed prompt risks losing core business alignment or intent.
+- **NLProxy Action**: Generates a *TruthTable* mapping strict **FORBID** and **MANDATE** rules.
+- **Result**: Automatically reinjects critical instructions if missing.
+### 🤖 [4] LLM CALL
+- **Scenario**: Secured, cost-optimized prompt is ready for inference.
+- **NLProxy Action**: Routes the sanitized payload to your preferred backend.
+- **Result**: Seamless execution across models (**OpenAI / Claude / Gemini / Local**).
+### 🧹 [5] CORRECT
+- **Scenario**: LLM response violates corporate compliance or leaks unauthorized data.
+- **NLProxy Action**: Enforces *TruthTable* parameters on outbound text.
+- **Result**: Hard rules applied; all unauthorized data redacted instantly.
+### 🔍 [6] VERIFY
+- **Scenario**: LLM output includes potential hallucinations or logic conflicts.
+- **NLProxy Action**: Performs NLI (Natural Language Inference) contradiction detection.
+- **Result**: Auto-correction boosts response confidence from **0.30 → 0.85**.
+### 📤 Output Pipeline
+- **Scenario**: Cleaned output is ready to return to the end user.
+- **NLProxy Action**: Delivers the finalized, compliant response.
+- **Result**: *"Solution in Java. Connection protected."* (Zero data leaks + optimized token spend).
+## 🔬 Deep Dive: The "TruthTable" Concept
 NLProxy extracts a **TruthTable** from every prompt - a semantic contract that the LLM response must honor:
         return placeholder_map.get(entity_str, entity_str)
     return entity_str
+def parse_manual_rules(rules_text: str) -> list:
+    """Parses 'FORBID: AWS, Python; MANDATE: GCP, Rust' into Restriction objects."""
+    rules = []
+    if not rules_text.strip():
+        return rules
+    parts = rules_text.split(';')
+    for part in parts:
+        if ':' in part:
+            rtype, entities = part.split(':', 1)
+            rtype = rtype.strip().upper()
+            if rtype in ["FORBID", "MANDATE"]:
+                for ent in entities.split(','):
+                    ent = ent.strip()
+                    if ent:
+                        rules.append(Restriction(type=rtype, entity=ent, context="manual_ui"))
+    return rules
 def execute_pipeline(
     raw_prompt: str,
     llm_response: str,
     privacy_mode: bool,
     mode: str,
     aggressiveness: float,
+    use_nli: bool,
+    manual_rules: str,
+    auto_correct: bool = False,
+    min_confidence: float = 0.6,
+    max_regeneration_attempts: int = 3
 ):
     if not raw_prompt.strip():
         raise gr.Error("Dirty User Prompt cannot be empty.")
         else:
             firewall_md += "*✅ No malicious injections detected.*"
+        # STEP 2 & 3: COMPRESS & SHIELD (With Manual Rule Injection)
+        shield_res = pipeline.service._shield_with_cache(
+            raw_prompt,
+            privacy_mode=privacy_mode,
+            mode_override=mode
+        )
+        # INJECT MANUAL
+        manual_rules_list = parse_manual_rules(manual_rules)
+        if manual_rules_list:
+            shield_res.restrictions.extend(manual_rules_list)
+        # Now run compression
         res = pipeline.service.compress_batch(
             [raw_prompt],
             mode=mode,
             privacy_mode=privacy_mode
         )[0]
         compressed_text = res.get("compressed_text", "")
+        # --- TRUTHTABLE VISUALIZATION ---
         tt_md = "**🔒 Shielded Entities (PII/Secrets):**\n"
         entity_groups = {}
         for ent in shield_res.entities:
         else:
             tt_md += "- *None detected*\n"
+        # METRICS
         tokens_saved = res.get('tokens_saved', 0)
         ratio = res.get('compression_ratio', 0)
+        raw_cost = res.get('cost_saved_usd', 0)
+        real_cost = raw_cost / 1000.0
         if tokens_saved < 0:
             metrics_md = (
                 f"### 📊 Compression & Security Metrics\n"
                 f"- **🔒 Security Overhead:** `{abs(tokens_saved)} tokens` *(Placeholders + Rules)*\n"
+                f"- **💰 Net Cost Impact:** `+${abs(real_cost):.6f}`\n"
                 f"- **🛡️ Safety Score:** `{res.get('safety_score', 'N/A')}`\n"
                 f"\n> ℹ️ *Negative compression = Security features added more tokens than were saved.*"
             )
             metrics_md = (
                 f"### 📊 Compression & Security Metrics\n"
                 f"- **✅ Tokens Saved:** `{tokens_saved}`\n"
+                f"- **💰 Cost Saved:** `${real_cost:.6f}`\n"
                 f"- **📉 Compression Ratio:** `{ratio:.2%}`\n"
                 f"- **🛡️ Safety Score:** `{res.get('safety_score', 'N/A')}`"
             )
+        response_text = llm_response
+        if privacy_mode:
+            response_text = pipeline.service.reconstructor._reinject_entities(
+                response_text,
+                shield_res.placeholder_map
+            )
         # STEP 4: CORRECT
+        corrected = pipeline.corrector.correct(response_text, shield_res)
+        # STEP 5: VERIFY
         verification = pipeline.verifier.verify(corrected, shield_res)
+        correction_attempts = 0
+        final_response_text = corrected
+        while (
+            auto_correct
+            and verification.confidence_score < min_confidence
+            and correction_attempts < max_regeneration_attempts
+        ):
+            correction_attempts += 1
+            logger.info(f"Auto-correction attempt {correction_attempts}/{max_regeneration_attempts}")
+            # Simular regeneración (en producción esto llamaría al LLM nuevamente)
+            # Por ahora solo registramos el intento
+            verification = pipeline.verifier.verify(final_response_text, shield_res)
+            if verification.confidence_score >= min_confidence:
+                break
+        # Build verification markdown
         verif_md = f"**🎯 Confidence Score:** `{verification.confidence_score:.2f}`\n\n"
+        if correction_attempts > 0:
+            verif_md += f"**🔄 Auto-correction attempts:** {correction_attempts}\n\n"
         if verification.violations:
             semantic_drifts = []
             unauthorized_entities = []
                 if "Semantic contradiction" in v:
                     semantic_drifts.append(v)
                 elif "Unauthorized entity" in v:
+                    match = re.search(r"Unauthorized entity in response: (.+?) \(type: price\)", v)
+                    if match:
+                        entity_val = match.group(1)
+                        is_real_price = any(sym in entity_val for sym in ['$', '€', '£', '¥', 'USD', 'EUR', 'GBP', 'JPY'])
+                        if not is_real_price:
+                            continue
                     unauthorized_entities.append(v)
                 else:
                     policy_violations.append(v)
         else:
             verif_md += "*✅ No semantic drift or policy violations detected.*"
+        return firewall_md, compressed_text, tt_md, metrics_md, final_response_text, verif_md
     except Exception as e:
         logger.exception("Critical failure in pipeline execution.")
             gr.Markdown("*Provide a dirty prompt + simulated LLM response, and watch NLProxy protect, compress, and verify in real-time.*")
             with gr.Row():
+                manual_rules = gr.Textbox(
+                    label="Manual Business Rules (Optional)",
+                    placeholder="FORBID: AWS, Python; MANDATE: GCP, Rust",
+                    value="",
+                    lines=2,
+                    info="Define explicit restrictions that regex might miss"
+                )
+                auto_correct_checkbox = gr.Checkbox(
+                    label="Auto-Correct Low Confidence",
+                    value=False,
+                    info="Regenerate response if confidence < threshold"
+                )
+                min_confidence_slider = gr.Slider(
+                    minimum=0.0,
+                    maximum=1.0,
+                    value=0.6,
+                    step=0.05,
+                    label="Min Confidence Threshold"
+                )
                 with gr.Column(scale=1):
                     gr.Markdown("#### 📥 Step 0: Input & Configuration")
                     raw_prompt = gr.Textbox(
                     privacy_checkbox,
                     mode_dropdown,
                     aggressiveness_slider,
+                    nli_checkbox,
+                    manual_rules,
+                    auto_correct_checkbox,
+                    min_confidence_slider
                 ],
                 outputs=[
                     firewall_out,