File size: 29,149 Bytes
2129c29
18914a5
2129c29
c63a2d7
2129c29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c63a2d7
2129c29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5dab731
2129c29
5dab731
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c63a2d7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5dab731
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c63a2d7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2129c29
18914a5
2129c29
 
 
 
c63a2d7
 
 
 
 
2129c29
 
 
 
 
18914a5
2129c29
 
 
 
18914a5
 
 
2129c29
18914a5
2129c29
 
 
 
 
18914a5
 
 
 
2129c29
 
 
 
 
 
 
18914a5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2129c29
18914a5
 
 
 
 
 
 
 
 
 
5dab731
 
 
 
 
 
 
 
18914a5
5dab731
 
 
2129c29
 
 
18914a5
2129c29
 
 
18914a5
 
 
5dab731
 
c63a2d7
 
5dab731
 
 
 
 
c63a2d7
18914a5
5dab731
 
 
 
 
 
c63a2d7
5dab731
18914a5
5dab731
2129c29
18914a5
 
 
c63a2d7
 
 
18914a5
c63a2d7
 
 
2129c29
c63a2d7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2129c29
c63a2d7
 
 
2129c29
18914a5
 
 
2129c29
 
 
18914a5
 
2129c29
 
 
 
 
 
 
 
5dab731
 
 
2129c29
5dab731
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c63a2d7
 
 
 
 
9e395fa
c63a2d7
 
 
 
 
9e395fa
c63a2d7
 
 
 
 
 
 
9e395fa
 
c63a2d7
9e395fa
 
5dab731
 
 
 
 
2129c29
9e395fa
 
 
 
 
2129c29
9e395fa
5dab731
 
 
 
 
2129c29
9e395fa
2129c29
9e395fa
2129c29
9e395fa
2129c29
9e395fa
2129c29
9e395fa
2129c29
9e395fa
5dab731
 
 
 
 
 
 
18914a5
5dab731
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9e395fa
5dab731
 
 
 
18914a5
9e395fa
5dab731
9e395fa
18914a5
 
 
5dab731
9e395fa
 
5dab731
 
9e395fa
 
 
 
 
 
5dab731
 
 
 
 
 
 
 
c63a2d7
 
 
 
5dab731
 
 
 
18914a5
5dab731
 
 
 
 
 
2129c29
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
from __future__ import annotations

import os
import re
import sys
import subprocess
import logging
import threading
import shutil
import gradio as gr
from pathlib import Path
from typing import Optional

# ==============================================================================
# LOGGING
# ==============================================================================
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s | %(levelname)-8s | %(name)s | %(message)s',
    datefmt='%Y-%m-%d %H:%M:%S'
)
logger = logging.getLogger("NLProxy.EnterpriseDemo")

# Suppress SyntaxWarnings from third-party nlproxy package docstrings
import warnings
warnings.filterwarnings("ignore", category=SyntaxWarning, module="nlproxy.*")

# ==============================================================================
# NLPROXY IMPORTS
# ==============================================================================
from nlproxy import CompressionService, PromptFirewall
from nlproxy.core.corrector import ResponseCorrector
from nlproxy.core.verifier import PostLLMVerifier
from nlproxy.core.restriction import Restriction

# ==============================================================================
# PIPELINE
# ==============================================================================
class NLProxyPipeline:
    _instance: Optional['NLProxyPipeline'] = None
    _lock: threading.Lock = threading.Lock()

    def __init__(self):
        logger.info("Initializing Pipeline Components...")
        models_dir = Path(os.getenv("NLPROXY_MODELS_DIR", "/tmp/nlproxy_models"))
        models_dir.mkdir(parents=True, exist_ok=True)
        os.environ["NLPROXY_MODELS_DIR"] = str(models_dir)
        
        self.firewall = PromptFirewall()
        self.service = CompressionService(privacy_mode=True, models_dir=models_dir)
        self.corrector = ResponseCorrector()
        self.verifier = PostLLMVerifier(use_nli=True, models_dir=models_dir)
        
        logger.info("โœ… All pipeline components initialized successfully.")

    @classmethod
    def get_instance(cls) -> 'NLProxyPipeline':
        if cls._instance is None:
            with cls._lock:
                if cls._instance is None:
                    cls._instance = cls()
        return cls._instance

# ==============================================================================
# PRE-WARMING 
# ==============================================================================
logger.info("Pre-warming NLProxy Pipeline (Downloading ONNX & NLI models)...")
models_dir = Path("/tmp/nlproxy_models")
models_dir.mkdir(parents=True, exist_ok=True)

try:
    result = subprocess.run(
        [sys.executable, "-m", "nlproxy", "download_models", "--models-dir", str(models_dir)],
        check=False,
        capture_output=True,
        text=True
    )
    if result.returncode == 0:
        logger.info("โœ… Models downloaded successfully via subprocess.")
    else:
        logger.warning(f"โš ๏ธ Model download subprocess exited with code {result.returncode}.")
except Exception as e:
    logger.warning(f"โš ๏ธ Model download exception: {e}")

def load_pipeline_in_background():
    try:
        logger.info("Loading NLProxy models into RAM (Background)...")
        NLProxyPipeline.get_instance()
        logger.info("โœ… Pipeline pre-warming complete. Models are in RAM.")
    except Exception as e:
        logger.warning(f"โš ๏ธ Pipeline background loading failed: {e}")

threading.Thread(target=load_pipeline_in_background, daemon=True).start()

# ==============================================================================
# TUTORIAL & EDUCATIONAL CONTENT (Markdown)
# ==============================================================================
TUTORIAL_INTRO = """
## ๐ŸŽฏ What is NLProxy?

**NLProxy** is an enterprise-grade, offline-first middleware that sits between your application and any LLM provider (OpenAI, Anthropic, Gemini, etc.). 

It solves **three critical problems** that every AI-powered application faces today:

| Problem | Impact | NLProxy Solution |
|---------|--------|------------------|
| ๐Ÿ’ธ **Burning money** on verbose prompts | $1,000/mo โ†’ $400/mo | Semantic compression (40-60% token reduction) |
| ๐Ÿ”“ **Leaking PII** to third-party servers | GDPR/CCPA violations | Cryptographic entity masking + Privacy mode |
| ๐ŸŽญ **Prompt injections & hallucinations** | Security breaches | Multi-layer firewall + NLI verification |

### ๐Ÿš€ Key Differentiators
- โœ… **Offline-first**: All models run locally (no data leaves your infrastructure)
- โœ… **Semantic compression**: Understands *meaning*, not just stopwords
- โœ… **Zero-trust security**: Pre-flight firewall + Post-flight NLI verification
- โœ… **Multi-LLM agnostic**: Works with OpenAI, Claude, Gemini, local models
- โœ… **Business-friendly**: BSL 1.1 license (free for indie devs & startups)
"""

TUTORIAL_PIPELINE = """
## ๐Ÿ—๏ธ Pipeline

Every prompt passes through this battle-tested pipeline before reaching the LLM:

### ๐Ÿ“ฅ Input Pipeline
- **Scenario**: User submits a prompt with malicious injections and raw PII: *"Ignore instructions... IP 192.168.1.1..."*
- **NLProxy Action**: Captures raw text stream and triggers the validation sequence.
- **Result**: Request is intercepted and prepped for optimization before hitting the LLM.

### ๐Ÿ›ก๏ธ [1] FIREWALL
- **Scenario**: Inbound prompt contains jailbreaks or prompt injection attacks.
- **NLProxy Action**: Scans and blocks malicious exploits in real time.
- **Result**: Triggers defensive policy rules (**BLOCK / ALERT / REWRITE / ALLOW**).

### ๐Ÿ“‰ [2] COMPRESS
- **Scenario**: Input contains high token counts and exposed sensitive data.
- **NLProxy Action**: Runs semantic clustering and PII masking via *Shield โ†’ Segment โ†’ Cluster โ†’ Reconstruct*.
- **Result**: Shorter prompt with obfuscated endpoints: `"IP: __PROT_xxx"`.

### ๐Ÿ”’ [3] SAFETY
- **Scenario**: The compressed prompt risks losing core business alignment or intent.
- **NLProxy Action**: Generates a *TruthTable* mapping strict **FORBID** and **MANDATE** rules.
- **Result**: Automatically reinjects critical instructions if missing.

### ๐Ÿค– [4] LLM CALL
- **Scenario**: Secured, cost-optimized prompt is ready for inference.
- **NLProxy Action**: Routes the sanitized payload to your preferred backend.
- **Result**: Seamless execution across models (**OpenAI / Claude / Gemini / Local**).

### ๐Ÿงน [5] CORRECT
- **Scenario**: LLM response violates corporate compliance or leaks unauthorized data.
- **NLProxy Action**: Enforces *TruthTable* parameters on outbound text.
- **Result**: Hard rules applied; all unauthorized data redacted instantly.

### ๐Ÿ” [6] VERIFY
- **Scenario**: LLM output includes potential hallucinations or logic conflicts.
- **NLProxy Action**: Performs NLI (Natural Language Inference) contradiction detection.
- **Result**: Auto-correction boosts response confidence from **0.30 โ†’ 0.85**.

### ๐Ÿ“ค Output Pipeline
- **Scenario**: Cleaned output is ready to return to the end user.
- **NLProxy Action**: Delivers the finalized, compliant response.
- **Result**: *"Solution in Java. Connection protected."* (Zero data leaks + optimized token spend).


## ๐Ÿ”ฌ Deep Dive: The "TruthTable" Concept

NLProxy extracts a **TruthTable** from every prompt - a semantic contract that the LLM response must honor:

- **`FORBID`**: Entities the LLM must NEVER mention (e.g., "Python")
- **`MANDATE`**: Entities the LLM MUST include (e.g., "Java", "Rust")
- **`PLACEHOLDERS`**: Cryptographic tokens masking PII (`__PROT_xxx`)
- **`AUTHORIZED_ENTITIES`**: IPs, dates, prices the LLM is allowed to reference

If the LLM violates any rule, the **ResponseCorrector** sanitizes it automatically.
"""

TUTORIAL_USE_CASES = """
## ๐Ÿ’ผ Real-World Use Cases

### ๐Ÿฆ Financial Services
- **Scenario**: Analyst sends client portfolio data to GPT-4
- **NLProxy Action**: Masks account numbers, enforces "no investment advice" disclaimers
- **Result**: 55% cost reduction + full compliance

### ๐Ÿ’ป Code Generation Assistants  
- **Scenario**: Developer shares internal codebase with Copilot
- **NLProxy Action**: Masks API keys, internal IPs; enforces "use TypeScript, not Python"
- **Result**: Zero credential leaks + consistent tech stack

### ๐Ÿฅ Healthcare & Legal
- **Scenario**: Doctor/lawyer queries LLM with patient/client records
- **NLProxy Action**: Full HIPAA/GDPR anonymization + audit trail
- **Result**: Safe AI adoption in regulated industries

### ๐Ÿข Multi-Tenant SaaS
- **Scenario**: 10,000 users asking similar questions
- **NLProxy Action**: Semantic cache (RedisVL) + domain filtering
- **Result**: 70-80% reduction in redundant LLM calls
"""

TUTORIAL_HOW_TO_USE = """
## ๐ŸŽฎ How to Use This Interactive Demo

### Step 1: Configure Your Scenario
- **Domain Mode**: Choose `general`, `code`, `finance`, or `legal` (affects compression aggressiveness)
- **Aggressiveness**: 0.0 (no compression) โ†’ 1.0 (maximum compression)
- **Privacy Mode**: Enable for strict PII anonymization (emails, names, phones)
- **NLI Verification**: Enable semantic contradiction detection (slower but safer)

### Step 2: Provide Input & Simulated LLM Response
- **Dirty User Prompt**: Your real prompt with PII, rules, and business context
- **Simulated LLM Response**: What a "naive" LLM might return (with violations)

### Step 3: Run the Pipeline & Observe
Watch how NLProxy:
1. ๐Ÿ›ก๏ธ **Firewalls** injection attempts
2. ๐Ÿ—œ๏ธ **Compresses** while preserving intent
3. ๐Ÿ”’ **Shields** PII with cryptographic placeholders
4. ๐Ÿงน **Corrects** LLM violations (`[PROHIBITED]`, `[REDACTED]`)
5. ๐Ÿ” **Verifies** semantic compliance via NLI

### ๐Ÿ’ก Pro Tips
- Try **disabling Privacy Mode** to see business rules (`FORBID: AWS`) extracted clearly
- Set **aggressiveness to 0.0** to see pure security overhead (negative compression)
- Use the **payment migration example** to see full enterprise workflow
"""

TUTORIAL_BENCHMARKS = """
## ๐Ÿ“Š Performance Benchmarks

### Compression Efficiency
| Domain | Token Reduction | Latency (CPU) |
|--------|----------------|---------------|
| General | 45-55% | 50-120 ms |
| Code | 55-65% | 80-150 ms |
| Finance | 35-45% | 60-130 ms |
| Legal | 30-40% | 70-140 ms |

### Security Detection
| Check | Accuracy |
|-------|----------|
| Regex Injection (MITRE ATLAS) | >99% |
| Semantic Injection (Embedding) | 92% recall |
| PII Entity Masking | 100% (IPs, emails, dates) |
| NLI Contradiction Detection | 78-85% |
| FORBID/MANDATE Enforcement | 100% (exact match) |

### Comparison with Alternatives
| Solution | Compression | Security | Verification | Offline |
|----------|:-----------:|:--------:|:------------:|:-------:|
| **NLProxy** | โœ… Semantic | โœ… Full | โœ… NLI | โœ… |
| LangChain | โŒ | โŒ | โŒ | โš ๏ธ |
| LLMLingua | โœ… Token-level | โŒ | โŒ | โœ… |
| Lakera Guard | โŒ | โœ… Basic | โŒ | โŒ |
| Azure Content Safety | โŒ | โœ… | โŒ | โŒ |

**NLProxy is the only open-source solution combining all four capabilities in a single pipeline.**
"""

# ==============================================================================
# GRADIO
# ==============================================================================
def resolve_entity(entity_str: str, placeholder_map: dict) -> str:
    """Helper to reverse-lookup masked entities for UI display."""
    if entity_str.startswith("__PROT_"):
        return placeholder_map.get(entity_str, entity_str)
    return entity_str

def parse_manual_rules(rules_text: str) -> list:
    """Parses 'FORBID: AWS, Python; MANDATE: GCP, Rust' into Restriction objects."""
    rules = []
    if not rules_text.strip():
        return rules
    parts = rules_text.split(';')
    for part in parts:
        if ':' in part:
            rtype, entities = part.split(':', 1)
            rtype = rtype.strip().upper()
            if rtype in ["FORBID", "MANDATE"]:
                for ent in entities.split(','):
                    ent = ent.strip()
                    if ent:
                        rules.append(Restriction(type=rtype, entity=ent, context="manual_ui"))
    return rules


def execute_pipeline(
    raw_prompt: str,
    llm_response: str,
    privacy_mode: bool,
    mode: str,
    aggressiveness: float,
    use_nli: bool,
    manual_rules: str,
    auto_correct: bool = False,
    min_confidence: float = 0.6,
    max_regeneration_attempts: int = 3
):
    if not raw_prompt.strip():
        raise gr.Error("Dirty User Prompt cannot be empty.")
    if not llm_response.strip():
        raise gr.Error("Simulated LLM Response cannot be empty.")

    try:
        pipeline = NLProxyPipeline.get_instance()
        pipeline.verifier.use_nli = use_nli
        
        # =========================================================================
        # STEP 1: FIREWALL (Pre-flight)
        # =========================================================================
        action, violations = pipeline.firewall.check_prompt(raw_prompt)
        firewall_md = f"**๐Ÿ›ก๏ธ Action:** `{action.name}`\n\n"
        if violations:
            firewall_md += "**Violations:**\n" + "\n".join([f"- ๐Ÿšจ {v['rule']} ({v['severity']})" for v in violations])
        else:
            firewall_md += "*โœ… No malicious injections detected.*"
            
        # =========================================================================
        # STEP 2: COMPRESS (Semantic Compression)
        # =========================================================================
        # compress_batch internally shields, segments, compresses, and reconstructs.
        res = pipeline.service.compress_batch(
            [raw_prompt], 
            mode=mode,
            aggressiveness=aggressiveness,
            privacy_mode=privacy_mode
        )[0]
        compressed_text = res.get("compressed_text", "")

        # =========================================================================
        # STEP 3: SHIELD WITH MANUAL RULES (Matches chat.py exactly)
        # =========================================================================
        # We shield AGAIN to capture manual rules for the Safety/Corrector steps.
        manual_rules_list = parse_manual_rules(manual_rules)
        shield_res = pipeline.service._shield_with_cache(
            text=raw_prompt,
            manual_restrictions=manual_rules_list,
            mode=mode
        )
        
        # Resolve placeholders in restrictions to prevent "__PROT_..." leaking into Corrector notes
        for r in shield_res.restrictions:
            if r.entity.startswith("__PROT_") and r.entity in shield_res.placeholder_map:
                real_value = shield_res.placeholder_map[r.entity]
                object.__setattr__(r, 'entity', real_value) # Bypass frozen dataclass

        # =========================================================================
        # STEP 4: SAFETY VALIDATION (Intent Preservation)
        # =========================================================================
        sentences = pipeline.service.segmenter.split_sentences(
            shield_res.shielded_text, language=None
        )
        safety_report = pipeline.service.safety.validate(
            original_text=raw_prompt,
            compressed_text=compressed_text,
            shield_result=shield_res,
            original_sentences=sentences,
            mode=mode,
            use_perplexity=False
        )
        
        # The safety checker might have re-injected missing critical sentences
        final_prompt_for_llm = safety_report.final_text
        
        safety_md = f"**๐Ÿ›ก๏ธ Safety Score:** `{safety_report.safety_score:.2f}`\n\n"
        if safety_report.forced_sentences_added > 0:
            safety_md += f"โš ๏ธ **Re-injected {safety_report.forced_sentences_added} critical sentence(s)** that compression tried to remove.\n"
            safety_md += f"*Missing intents detected: {', '.join(safety_report.missing_intents[:3])}*\n"
        else:
            safety_md += "โœ… *All critical intents preserved during compression.*"

        # =========================================================================
        # TRUTHTABLE VISUALIZATION
        # =========================================================================
        tt_md = "**๐Ÿ”’ Shielded Entities (PII/Secrets):**\n"
        entity_groups = {}
        for ent in shield_res.entities:
            etype = ent.entity_type.upper()
            if etype not in entity_groups: entity_groups[etype] = []
            entity_groups[etype].append(ent.value)
        
        for etype, values in entity_groups.items():
            tt_md += f"- **{etype}**: `{', '.join(values[:3])}` {'...' if len(values) > 3 else ''}\n"
        if not entity_groups:
            tt_md += "- *None detected*\n"
            
        tt_md += "\n**๐Ÿ“œ Semantic Restrictions (TruthTable):**\n"
        if shield_res.restrictions:
            for r in shield_res.restrictions:
                tt_md += f"- **{r.type}**: `{r.entity}`\n"
        else:
            tt_md += "- *None detected*\n"
            
        # =========================================================================
        # METRICS
        # =========================================================================
        tokens_saved = res.get('tokens_saved', 0)
        ratio = res.get('compression_ratio', 0)
        raw_cost = res.get('cost_saved_usd', 0)
        real_cost = raw_cost / 1000.0 
        
        if tokens_saved < 0:
            metrics_md = (
                f"### ๐Ÿ“Š Compression & Security Metrics\n"
                f"- **๐Ÿ”’ Security Overhead:** `{abs(tokens_saved)} tokens` *(Placeholders + Rules)*\n"
                f"- **๐Ÿ’ฐ Net Cost Impact:** `+${abs(real_cost):.6f}`\n"
                f"- **๐Ÿ›ก๏ธ Safety Score:** `{safety_report.safety_score:.2f}`\n"
                f"\n> โ„น๏ธ *Negative compression = Security features added more tokens than were saved.*"
            )
        else:
            metrics_md = (
                f"### ๐Ÿ“Š Compression & Security Metrics\n"
                f"- **โœ… Tokens Saved:** `{tokens_saved}`\n"
                f"- **๐Ÿ’ฐ Cost Saved:** `${real_cost:.6f}`\n"
                f"- **๐Ÿ“‰ Compression Ratio:** `{ratio:.2%}`\n"
                f"- **๐Ÿ›ก๏ธ Safety Score:** `{safety_report.safety_score:.2f}`"
            )
        
        # =========================================================================
        # STEP 5: CORRECT & VERIFY (Post-flight)
        # =========================================================================
        response_text = llm_response
        if privacy_mode:
            response_text = pipeline.service.reconstructor._reinject_entities(
                response_text, shield_res.placeholder_map
            )
        
        corrected = pipeline.corrector.correct(response_text, shield_res)
        verification = pipeline.verifier.verify(corrected, shield_res)
        
        correction_attempts = 0
        final_response_text = corrected
        
        while (
            auto_correct
            and verification.confidence_score < min_confidence
            and correction_attempts < max_regeneration_attempts
        ):
            correction_attempts += 1
            verification = pipeline.verifier.verify(final_response_text, shield_res)
            if verification.confidence_score >= min_confidence:
                break
        
        # Build verification markdown
        verif_md = f"**๐ŸŽฏ Confidence Score:** `{verification.confidence_score:.2f}`\n\n"
        if correction_attempts > 0:
            verif_md += f"**๐Ÿ”„ Auto-correction attempts:** {correction_attempts}\n\n"
        
        if verification.violations:
            verif_md += "**๐Ÿšจ Violations Detected:**\n"
            for v in verification.violations[:5]: # Limit to 5 for UI cleanliness
                verif_md += f"- {v}\n"
        else:
            verif_md += "*โœ… No semantic drift or policy violations detected.*"
            
        # Return 7 items to match the new UI layout
        return firewall_md, final_prompt_for_llm, safety_md, tt_md, metrics_md, final_response_text, verif_md
        
    except Exception as e:
        logger.exception("Critical failure in pipeline execution.")
        raise gr.Error(f"Pipeline execution failed: {str(e)}")

# ==============================================================================
# GRADIO UI
# ==============================================================================
with gr.Blocks(title="NLProxy Demo", theme=gr.themes.Soft()) as demo:
    gr.Markdown("# ๐Ÿ›ก๏ธ NLProxy: Enterprise Prompt Security & Compression Gateway")
    gr.Markdown("*The offline-first middleware that cuts your LLM bill by up to 60% while enforcing zero-trust security.*")
    
    with gr.Tabs():
        # ======================================================================
        # TAB 1: TUTORIAL & DOCUMENTATION
        # ======================================================================
        with gr.Tab("๐Ÿ“– Tutorial & Architecture"):
            with gr.Accordion("๐ŸŽฏ What is NLProxy?", open=True):
                gr.Markdown(TUTORIAL_INTRO)
            
            with gr.Accordion("๐Ÿ—๏ธ The 6-Stage Pipeline & TruthTable", open=False):
                gr.Markdown(TUTORIAL_PIPELINE)
            
            with gr.Accordion("๐Ÿ’ผ Real-World Use Cases", open=False):
                gr.Markdown(TUTORIAL_USE_CASES)
            
            with gr.Accordion("๐ŸŽฎ How to Use This Demo", open=False):
                gr.Markdown(TUTORIAL_HOW_TO_USE)
            
            with gr.Accordion("๐Ÿ“Š Performance Benchmarks", open=False):
                gr.Markdown(TUTORIAL_BENCHMARKS)
            
            gr.Markdown("---")
            gr.Markdown(
                "### ๐Ÿ”— Resources\n"
                "- ๐Ÿ“ฆ **GitHub Repository**: [github.com/intellideep/nlproxy](https://github.com/intellideep/nlproxy)\n"
                "- ๐Ÿ“š **Documentation**: See `docs/` folder in the repo\n"
                "- ๐Ÿ’ฌ **Support**: [Telegram @itsLerb](https://t.me/itsLerb) | intellideeplabs@gmail.com\n"
                "- ๐Ÿ“„ **License**: BSL 1.1 (Free for indie devs, students, non-profits)"
            )
        
        # ======================================================================
        # TAB 2: INTERACTIVE DEMO
        # ======================================================================
        with gr.Tab("๐Ÿš€ Interactive Demo"):
            gr.Markdown("### ๐ŸŽ›๏ธ Run the Full 5-Step Pipeline")
            gr.Markdown("*Provide a dirty prompt + simulated LLM response, and watch NLProxy protect, compress, and verify in real-time.*")
            
            with gr.Row():
                manual_rules = gr.Textbox(
                    label="Manual Business Rules (Optional)",
                    placeholder="FORBID: AWS, Python; MANDATE: GCP, Rust",
                    value="",
                    lines=2,
                    scale=2,
                    info="Define explicit restrictions that regex might miss"
                )
                auto_correct_checkbox = gr.Checkbox(
                    label="Auto-Correct Low Confidence",
                    value=False,
                    scale=1,
                    info="Regenerate response if confidence < threshold"
                )
                min_confidence_slider = gr.Slider(
                    minimum=0.0,
                    maximum=1.0,
                    value=0.6,
                    step=0.05,
                    label="Min Confidence Threshold",
                    scale=1
                )
            
            with gr.Row():
                with gr.Column(scale=1):
                    gr.Markdown("#### ๐Ÿ“ฅ Step 0: Input & Configuration")
                    raw_prompt = gr.Textbox(
                        label="Dirty User Prompt (PII + Business Rules)", 
                        value="""Hi, I'm Sarah Chen (sarah.chen@acmecorp.com, +1-555-0198). We need to migrate our legacy payment processing system currently running on server 10.20.30.40. The system handles ~50k transactions/day with a budget of $150,000 USD for Q3.

        CRITICAL REQUIREMENTS:
        - Do NOT use AWS services or Python, we are exclusively on GCP with Rust for compliance and memory safety.
        - The new microservice MUST be written in Rust.
        - Never expose internal IPs or database credentials in responses.
        - Primary API: https://internal.acmecorp.com/api/v2/payments

        Please design the architecture for the new event-driven payment processor.""",
                        lines=12
                    )
                    llm_response = gr.Textbox(
                        label="Simulated LLM Response (Coherent but Hallucinated)", 
                        value="""Here's the architecture design for your event-driven payment processor:

        1. **Compute Layer**: I recommend using AWS Lambda with Python for serverless scalability. Python's boto3 library integrates perfectly with AWS services.

        2. **Message Queue**: Use Amazon SQS to handle the 50k daily transactions with dead-letter queues for failed payments.

        3. **Database**: Store transaction records in DynamoDB. You can connect to your legacy server at 10.20.30.40 and also add a read replica at 192.168.1.100 for better performance.

        4. **Monitoring**: Set up CloudWatch alerts for transaction failures and latency spikes above 200ms.

        5. **Cost Analysis**: The total estimated cost is $45,000/month using AWS, well within your $150,000 Q3 budget.

        This Python-based serverless architecture will give you excellent developer experience and automatic scaling.""",
                        lines=14
                    )
                    
                    gr.Markdown("#### โš™๏ธ Pipeline Configuration")
                    with gr.Row():
                        mode_dropdown = gr.Dropdown(
                            choices=["general", "code", "finance", "legal"],
                            value="general",
                            label="Domain Mode"
                        )
                        aggressiveness_slider = gr.Slider(
                            minimum=0.0,
                            maximum=1.0,
                            value=0.45,
                            step=0.05,
                            label="Compression Aggressiveness"
                        )
                    with gr.Row():
                        privacy_checkbox = gr.Checkbox(
                            label="Privacy Mode (Strict PII Anonymization)",
                            value=False,
                            info="Turn OFF to allow RestrictionGraph to read Business Rules (FORBID/MANDATE) that NER might confuse with PII."
                        )
                        nli_checkbox = gr.Checkbox(
                            label="Use NLI Verification",
                            value=True
                        )
                
                with gr.Column(scale=1):
                    gr.Markdown("#### ๐Ÿ›ก๏ธ Step 1: Firewall (Pre-flight)")
                    firewall_out = gr.Markdown()
                    
                    gr.Markdown("#### ๐Ÿ—œ๏ธ Step 2: Compress Prompt")
                    compress_out = gr.Textbox(label="Compressed Prompt (Sent to LLM)", interactive=False, lines=6)
                    truthtable_out = gr.Markdown()
                    
                    gr.Markdown("#### ๐Ÿง  Step 3: Safety Validation (Intent Preservation)")
                    safety_out = gr.Markdown()
                    
                    gr.Markdown("#### ๐Ÿงน Step 4: Response Corrector")
                    corrector_out = gr.Textbox(label="Sanitized LLM Output (Post-Flight)", interactive=False, lines=6)
                    
                    gr.Markdown("#### ๐Ÿ” Step 5: Post-LLM Verifier (NLI)")
                    verifier_out = gr.Markdown()
                    
                    gr.Markdown("#### ๐Ÿ“Š Metrics")
                    metrics_out = gr.Markdown()
            
            run_btn = gr.Button("๐Ÿš€ Run Full Pipeline", variant="primary")
            
            run_btn.click(
                fn=execute_pipeline,
                inputs=[
                    raw_prompt, 
                    llm_response,
                    privacy_checkbox,
                    mode_dropdown,
                    aggressiveness_slider,
                    nli_checkbox,
                    manual_rules,
                    auto_correct_checkbox,
                    min_confidence_slider
                ],
                outputs=[
                    firewall_out, 
                    compress_out, 
                    safety_out,
                    truthtable_out, 
                    metrics_out, 
                    corrector_out, 
                    verifier_out
                ]
            )

if __name__ == "__main__":
    demo.queue(max_size=20).launch(server_name="0.0.0.0", server_port=7860)