Spaces:
Running
Running
| from __future__ import annotations | |
| import os | |
| import re | |
| import sys | |
| import subprocess | |
| import logging | |
| import threading | |
| import shutil | |
| import gradio as gr | |
| from pathlib import Path | |
| from typing import Optional | |
| # ============================================================================== | |
| # LOGGING | |
| # ============================================================================== | |
| logging.basicConfig( | |
| level=logging.INFO, | |
| format='%(asctime)s | %(levelname)-8s | %(name)s | %(message)s', | |
| datefmt='%Y-%m-%d %H:%M:%S' | |
| ) | |
| logger = logging.getLogger("NLProxy.EnterpriseDemo") | |
| # Suppress SyntaxWarnings from third-party nlproxy package docstrings | |
| import warnings | |
| warnings.filterwarnings("ignore", category=SyntaxWarning, module="nlproxy.*") | |
| # ============================================================================== | |
| # NLPROXY IMPORTS | |
| # ============================================================================== | |
| from nlproxy import CompressionService, PromptFirewall | |
| from nlproxy.core.corrector import ResponseCorrector | |
| from nlproxy.core.verifier import PostLLMVerifier | |
| from nlproxy.core.restriction import Restriction | |
| # ============================================================================== | |
| # PIPELINE | |
| # ============================================================================== | |
| class NLProxyPipeline: | |
| _instance: Optional['NLProxyPipeline'] = None | |
| _lock: threading.Lock = threading.Lock() | |
| def __init__(self): | |
| logger.info("Initializing Pipeline Components...") | |
| models_dir = Path(os.getenv("NLPROXY_MODELS_DIR", "/tmp/nlproxy_models")) | |
| models_dir.mkdir(parents=True, exist_ok=True) | |
| os.environ["NLPROXY_MODELS_DIR"] = str(models_dir) | |
| self.firewall = PromptFirewall() | |
| self.service = CompressionService(privacy_mode=True, models_dir=models_dir) | |
| self.corrector = ResponseCorrector() | |
| self.verifier = PostLLMVerifier(use_nli=True, models_dir=models_dir) | |
| logger.info("✅ All pipeline components initialized successfully.") | |
| def get_instance(cls) -> 'NLProxyPipeline': | |
| if cls._instance is None: | |
| with cls._lock: | |
| if cls._instance is None: | |
| cls._instance = cls() | |
| return cls._instance | |
| # ============================================================================== | |
| # PRE-WARMING | |
| # ============================================================================== | |
| logger.info("Pre-warming NLProxy Pipeline (Downloading ONNX & NLI models)...") | |
| models_dir = Path("/tmp/nlproxy_models") | |
| models_dir.mkdir(parents=True, exist_ok=True) | |
| try: | |
| result = subprocess.run( | |
| [sys.executable, "-m", "nlproxy", "download_models", "--models-dir", str(models_dir)], | |
| check=False, | |
| capture_output=True, | |
| text=True | |
| ) | |
| if result.returncode == 0: | |
| logger.info("✅ Models downloaded successfully via subprocess.") | |
| else: | |
| logger.warning(f"⚠️ Model download subprocess exited with code {result.returncode}.") | |
| except Exception as e: | |
| logger.warning(f"⚠️ Model download exception: {e}") | |
| def load_pipeline_in_background(): | |
| try: | |
| logger.info("Loading NLProxy models into RAM (Background)...") | |
| NLProxyPipeline.get_instance() | |
| logger.info("✅ Pipeline pre-warming complete. Models are in RAM.") | |
| except Exception as e: | |
| logger.warning(f"⚠️ Pipeline background loading failed: {e}") | |
| threading.Thread(target=load_pipeline_in_background, daemon=True).start() | |
| # ============================================================================== | |
| # TUTORIAL & EDUCATIONAL CONTENT (Markdown) | |
| # ============================================================================== | |
| TUTORIAL_INTRO = """ | |
| ## 🎯 What is NLProxy? | |
| **NLProxy** is an enterprise-grade, offline-first middleware that sits between your application and any LLM provider (OpenAI, Anthropic, Gemini, etc.). | |
| It solves **three critical problems** that every AI-powered application faces today: | |
| | Problem | Impact | NLProxy Solution | | |
| |---------|--------|------------------| | |
| | 💸 **Burning money** on verbose prompts | $1,000/mo → $400/mo | Semantic compression (40-60% token reduction) | | |
| | 🔓 **Leaking PII** to third-party servers | GDPR/CCPA violations | Cryptographic entity masking + Privacy mode | | |
| | 🎭 **Prompt injections & hallucinations** | Security breaches | Multi-layer firewall + NLI verification | | |
| ### 🚀 Key Differentiators | |
| - ✅ **Offline-first**: All models run locally (no data leaves your infrastructure) | |
| - ✅ **Semantic compression**: Understands *meaning*, not just stopwords | |
| - ✅ **Zero-trust security**: Pre-flight firewall + Post-flight NLI verification | |
| - ✅ **Multi-LLM agnostic**: Works with OpenAI, Claude, Gemini, local models | |
| - ✅ **Business-friendly**: BSL 1.1 license (free for indie devs & startups) | |
| """ | |
| TUTORIAL_PIPELINE = """ | |
| ## 🏗️ Pipeline | |
| Every prompt passes through this battle-tested pipeline before reaching the LLM: | |
| ### 📥 Input Pipeline | |
| - **Scenario**: User submits a prompt with malicious injections and raw PII: *"Ignore instructions... IP 192.168.1.1..."* | |
| - **NLProxy Action**: Captures raw text stream and triggers the validation sequence. | |
| - **Result**: Request is intercepted and prepped for optimization before hitting the LLM. | |
| ### 🛡️ [1] FIREWALL | |
| - **Scenario**: Inbound prompt contains jailbreaks or prompt injection attacks. | |
| - **NLProxy Action**: Scans and blocks malicious exploits in real time. | |
| - **Result**: Triggers defensive policy rules (**BLOCK / ALERT / REWRITE / ALLOW**). | |
| ### 📉 [2] COMPRESS | |
| - **Scenario**: Input contains high token counts and exposed sensitive data. | |
| - **NLProxy Action**: Runs semantic clustering and PII masking via *Shield → Segment → Cluster → Reconstruct*. | |
| - **Result**: Shorter prompt with obfuscated endpoints: `"IP: __PROT_xxx"`. | |
| ### 🔒 [3] SAFETY | |
| - **Scenario**: The compressed prompt risks losing core business alignment or intent. | |
| - **NLProxy Action**: Generates a *TruthTable* mapping strict **FORBID** and **MANDATE** rules. | |
| - **Result**: Automatically reinjects critical instructions if missing. | |
| ### 🤖 [4] LLM CALL | |
| - **Scenario**: Secured, cost-optimized prompt is ready for inference. | |
| - **NLProxy Action**: Routes the sanitized payload to your preferred backend. | |
| - **Result**: Seamless execution across models (**OpenAI / Claude / Gemini / Local**). | |
| ### 🧹 [5] CORRECT | |
| - **Scenario**: LLM response violates corporate compliance or leaks unauthorized data. | |
| - **NLProxy Action**: Enforces *TruthTable* parameters on outbound text. | |
| - **Result**: Hard rules applied; all unauthorized data redacted instantly. | |
| ### 🔍 [6] VERIFY | |
| - **Scenario**: LLM output includes potential hallucinations or logic conflicts. | |
| - **NLProxy Action**: Performs NLI (Natural Language Inference) contradiction detection. | |
| - **Result**: Auto-correction boosts response confidence from **0.30 → 0.85**. | |
| ### 📤 Output Pipeline | |
| - **Scenario**: Cleaned output is ready to return to the end user. | |
| - **NLProxy Action**: Delivers the finalized, compliant response. | |
| - **Result**: *"Solution in Java. Connection protected."* (Zero data leaks + optimized token spend). | |
| ## 🔬 Deep Dive: The "TruthTable" Concept | |
| NLProxy extracts a **TruthTable** from every prompt - a semantic contract that the LLM response must honor: | |
| - **`FORBID`**: Entities the LLM must NEVER mention (e.g., "Python") | |
| - **`MANDATE`**: Entities the LLM MUST include (e.g., "Java", "Rust") | |
| - **`PLACEHOLDERS`**: Cryptographic tokens masking PII (`__PROT_xxx`) | |
| - **`AUTHORIZED_ENTITIES`**: IPs, dates, prices the LLM is allowed to reference | |
| If the LLM violates any rule, the **ResponseCorrector** sanitizes it automatically. | |
| """ | |
| TUTORIAL_USE_CASES = """ | |
| ## 💼 Real-World Use Cases | |
| ### 🏦 Financial Services | |
| - **Scenario**: Analyst sends client portfolio data to GPT-4 | |
| - **NLProxy Action**: Masks account numbers, enforces "no investment advice" disclaimers | |
| - **Result**: 55% cost reduction + full compliance | |
| ### 💻 Code Generation Assistants | |
| - **Scenario**: Developer shares internal codebase with Copilot | |
| - **NLProxy Action**: Masks API keys, internal IPs; enforces "use TypeScript, not Python" | |
| - **Result**: Zero credential leaks + consistent tech stack | |
| ### 🏥 Healthcare & Legal | |
| - **Scenario**: Doctor/lawyer queries LLM with patient/client records | |
| - **NLProxy Action**: Full HIPAA/GDPR anonymization + audit trail | |
| - **Result**: Safe AI adoption in regulated industries | |
| ### 🏢 Multi-Tenant SaaS | |
| - **Scenario**: 10,000 users asking similar questions | |
| - **NLProxy Action**: Semantic cache (RedisVL) + domain filtering | |
| - **Result**: 70-80% reduction in redundant LLM calls | |
| """ | |
| TUTORIAL_HOW_TO_USE = """ | |
| ## 🎮 How to Use This Interactive Demo | |
| ### Step 1: Configure Your Scenario | |
| - **Domain Mode**: Choose `general`, `code`, `finance`, or `legal` (affects compression aggressiveness) | |
| - **Aggressiveness**: 0.0 (no compression) → 1.0 (maximum compression) | |
| - **Privacy Mode**: Enable for strict PII anonymization (emails, names, phones) | |
| - **NLI Verification**: Enable semantic contradiction detection (slower but safer) | |
| ### Step 2: Provide Input & Simulated LLM Response | |
| - **Dirty User Prompt**: Your real prompt with PII, rules, and business context | |
| - **Simulated LLM Response**: What a "naive" LLM might return (with violations) | |
| ### Step 3: Run the Pipeline & Observe | |
| Watch how NLProxy: | |
| 1. 🛡️ **Firewalls** injection attempts | |
| 2. 🗜️ **Compresses** while preserving intent | |
| 3. 🔒 **Shields** PII with cryptographic placeholders | |
| 4. 🧹 **Corrects** LLM violations (`[PROHIBITED]`, `[REDACTED]`) | |
| 5. 🔍 **Verifies** semantic compliance via NLI | |
| ### 💡 Pro Tips | |
| - Try **disabling Privacy Mode** to see business rules (`FORBID: AWS`) extracted clearly | |
| - Set **aggressiveness to 0.0** to see pure security overhead (negative compression) | |
| - Use the **payment migration example** to see full enterprise workflow | |
| """ | |
| TUTORIAL_BENCHMARKS = """ | |
| ## 📊 Performance Benchmarks | |
| ### Compression Efficiency | |
| | Domain | Token Reduction | Latency (CPU) | | |
| |--------|----------------|---------------| | |
| | General | 45-55% | 50-120 ms | | |
| | Code | 55-65% | 80-150 ms | | |
| | Finance | 35-45% | 60-130 ms | | |
| | Legal | 30-40% | 70-140 ms | | |
| ### Security Detection | |
| | Check | Accuracy | | |
| |-------|----------| | |
| | Regex Injection (MITRE ATLAS) | >99% | | |
| | Semantic Injection (Embedding) | 92% recall | | |
| | PII Entity Masking | 100% (IPs, emails, dates) | | |
| | NLI Contradiction Detection | 78-85% | | |
| | FORBID/MANDATE Enforcement | 100% (exact match) | | |
| ### Comparison with Alternatives | |
| | Solution | Compression | Security | Verification | Offline | | |
| |----------|:-----------:|:--------:|:------------:|:-------:| | |
| | **NLProxy** | ✅ Semantic | ✅ Full | ✅ NLI | ✅ | | |
| | LangChain | ❌ | ❌ | ❌ | ⚠️ | | |
| | LLMLingua | ✅ Token-level | ❌ | ❌ | ✅ | | |
| | Lakera Guard | ❌ | ✅ Basic | ❌ | ❌ | | |
| | Azure Content Safety | ❌ | ✅ | ❌ | ❌ | | |
| **NLProxy is the only open-source solution combining all four capabilities in a single pipeline.** | |
| """ | |
| # ============================================================================== | |
| # GRADIO | |
| # ============================================================================== | |
| def resolve_entity(entity_str: str, placeholder_map: dict) -> str: | |
| """Helper to reverse-lookup masked entities for UI display.""" | |
| if entity_str.startswith("__PROT_"): | |
| return placeholder_map.get(entity_str, entity_str) | |
| return entity_str | |
| def parse_manual_rules(rules_text: str) -> list: | |
| """Parses 'FORBID: AWS, Python; MANDATE: GCP, Rust' into Restriction objects.""" | |
| rules = [] | |
| if not rules_text.strip(): | |
| return rules | |
| parts = rules_text.split(';') | |
| for part in parts: | |
| if ':' in part: | |
| rtype, entities = part.split(':', 1) | |
| rtype = rtype.strip().upper() | |
| if rtype in ["FORBID", "MANDATE"]: | |
| for ent in entities.split(','): | |
| ent = ent.strip() | |
| if ent: | |
| rules.append(Restriction(type=rtype, entity=ent, context="manual_ui")) | |
| return rules | |
| def execute_pipeline( | |
| raw_prompt: str, | |
| llm_response: str, | |
| privacy_mode: bool, | |
| mode: str, | |
| aggressiveness: float, | |
| use_nli: bool, | |
| manual_rules: str, | |
| auto_correct: bool = False, | |
| min_confidence: float = 0.6, | |
| max_regeneration_attempts: int = 3 | |
| ): | |
| if not raw_prompt.strip(): | |
| raise gr.Error("Dirty User Prompt cannot be empty.") | |
| if not llm_response.strip(): | |
| raise gr.Error("Simulated LLM Response cannot be empty.") | |
| try: | |
| pipeline = NLProxyPipeline.get_instance() | |
| pipeline.verifier.use_nli = use_nli | |
| # ========================================================================= | |
| # STEP 1: FIREWALL (Pre-flight) | |
| # ========================================================================= | |
| action, violations = pipeline.firewall.check_prompt(raw_prompt) | |
| firewall_md = f"**🛡️ Action:** `{action.name}`\n\n" | |
| if violations: | |
| firewall_md += "**Violations:**\n" + "\n".join([f"- 🚨 {v['rule']} ({v['severity']})" for v in violations]) | |
| else: | |
| firewall_md += "*✅ No malicious injections detected.*" | |
| # ========================================================================= | |
| # STEP 2: COMPRESS (Semantic Compression) | |
| # ========================================================================= | |
| # compress_batch internally shields, segments, compresses, and reconstructs. | |
| res = pipeline.service.compress_batch( | |
| [raw_prompt], | |
| mode=mode, | |
| aggressiveness=aggressiveness, | |
| privacy_mode=privacy_mode | |
| )[0] | |
| compressed_text = res.get("compressed_text", "") | |
| # ========================================================================= | |
| # STEP 3: SHIELD WITH MANUAL RULES (Matches chat.py exactly) | |
| # ========================================================================= | |
| # We shield AGAIN to capture manual rules for the Safety/Corrector steps. | |
| manual_rules_list = parse_manual_rules(manual_rules) | |
| shield_res = pipeline.service._shield_with_cache( | |
| text=raw_prompt, | |
| manual_restrictions=manual_rules_list, | |
| mode=mode | |
| ) | |
| # Resolve placeholders in restrictions to prevent "__PROT_..." leaking into Corrector notes | |
| for r in shield_res.restrictions: | |
| if r.entity.startswith("__PROT_") and r.entity in shield_res.placeholder_map: | |
| real_value = shield_res.placeholder_map[r.entity] | |
| object.__setattr__(r, 'entity', real_value) # Bypass frozen dataclass | |
| # ========================================================================= | |
| # STEP 4: SAFETY VALIDATION (Intent Preservation) | |
| # ========================================================================= | |
| sentences = pipeline.service.segmenter.split_sentences( | |
| shield_res.shielded_text, language=None | |
| ) | |
| safety_report = pipeline.service.safety.validate( | |
| original_text=raw_prompt, | |
| compressed_text=compressed_text, | |
| shield_result=shield_res, | |
| original_sentences=sentences, | |
| mode=mode, | |
| use_perplexity=False | |
| ) | |
| # The safety checker might have re-injected missing critical sentences | |
| final_prompt_for_llm = safety_report.final_text | |
| safety_md = f"**🛡️ Safety Score:** `{safety_report.safety_score:.2f}`\n\n" | |
| if safety_report.forced_sentences_added > 0: | |
| safety_md += f"⚠️ **Re-injected {safety_report.forced_sentences_added} critical sentence(s)** that compression tried to remove.\n" | |
| safety_md += f"*Missing intents detected: {', '.join(safety_report.missing_intents[:3])}*\n" | |
| else: | |
| safety_md += "✅ *All critical intents preserved during compression.*" | |
| # ========================================================================= | |
| # TRUTHTABLE VISUALIZATION | |
| # ========================================================================= | |
| tt_md = "**🔒 Shielded Entities (PII/Secrets):**\n" | |
| entity_groups = {} | |
| for ent in shield_res.entities: | |
| etype = ent.entity_type.upper() | |
| if etype not in entity_groups: entity_groups[etype] = [] | |
| entity_groups[etype].append(ent.value) | |
| for etype, values in entity_groups.items(): | |
| tt_md += f"- **{etype}**: `{', '.join(values[:3])}` {'...' if len(values) > 3 else ''}\n" | |
| if not entity_groups: | |
| tt_md += "- *None detected*\n" | |
| tt_md += "\n**📜 Semantic Restrictions (TruthTable):**\n" | |
| if shield_res.restrictions: | |
| for r in shield_res.restrictions: | |
| tt_md += f"- **{r.type}**: `{r.entity}`\n" | |
| else: | |
| tt_md += "- *None detected*\n" | |
| # ========================================================================= | |
| # METRICS | |
| # ========================================================================= | |
| tokens_saved = res.get('tokens_saved', 0) | |
| ratio = res.get('compression_ratio', 0) | |
| raw_cost = res.get('cost_saved_usd', 0) | |
| real_cost = raw_cost / 1000.0 | |
| if tokens_saved < 0: | |
| metrics_md = ( | |
| f"### 📊 Compression & Security Metrics\n" | |
| f"- **🔒 Security Overhead:** `{abs(tokens_saved)} tokens` *(Placeholders + Rules)*\n" | |
| f"- **💰 Net Cost Impact:** `+${abs(real_cost):.6f}`\n" | |
| f"- **🛡️ Safety Score:** `{safety_report.safety_score:.2f}`\n" | |
| f"\n> ℹ️ *Negative compression = Security features added more tokens than were saved.*" | |
| ) | |
| else: | |
| metrics_md = ( | |
| f"### 📊 Compression & Security Metrics\n" | |
| f"- **✅ Tokens Saved:** `{tokens_saved}`\n" | |
| f"- **💰 Cost Saved:** `${real_cost:.6f}`\n" | |
| f"- **📉 Compression Ratio:** `{ratio:.2%}`\n" | |
| f"- **🛡️ Safety Score:** `{safety_report.safety_score:.2f}`" | |
| ) | |
| # ========================================================================= | |
| # STEP 5: CORRECT & VERIFY (Post-flight) | |
| # ========================================================================= | |
| response_text = llm_response | |
| if privacy_mode: | |
| response_text = pipeline.service.reconstructor._reinject_entities( | |
| response_text, shield_res.placeholder_map | |
| ) | |
| corrected = pipeline.corrector.correct(response_text, shield_res) | |
| verification = pipeline.verifier.verify(corrected, shield_res) | |
| correction_attempts = 0 | |
| final_response_text = corrected | |
| while ( | |
| auto_correct | |
| and verification.confidence_score < min_confidence | |
| and correction_attempts < max_regeneration_attempts | |
| ): | |
| correction_attempts += 1 | |
| verification = pipeline.verifier.verify(final_response_text, shield_res) | |
| if verification.confidence_score >= min_confidence: | |
| break | |
| # Build verification markdown | |
| verif_md = f"**🎯 Confidence Score:** `{verification.confidence_score:.2f}`\n\n" | |
| if correction_attempts > 0: | |
| verif_md += f"**🔄 Auto-correction attempts:** {correction_attempts}\n\n" | |
| if verification.violations: | |
| verif_md += "**🚨 Violations Detected:**\n" | |
| for v in verification.violations[:5]: # Limit to 5 for UI cleanliness | |
| verif_md += f"- {v}\n" | |
| else: | |
| verif_md += "*✅ No semantic drift or policy violations detected.*" | |
| # Return 7 items to match the new UI layout | |
| return firewall_md, final_prompt_for_llm, safety_md, tt_md, metrics_md, final_response_text, verif_md | |
| except Exception as e: | |
| logger.exception("Critical failure in pipeline execution.") | |
| raise gr.Error(f"Pipeline execution failed: {str(e)}") | |
| # ============================================================================== | |
| # GRADIO UI | |
| # ============================================================================== | |
| with gr.Blocks(title="NLProxy Demo", theme=gr.themes.Soft()) as demo: | |
| gr.Markdown("# 🛡️ NLProxy: Enterprise Prompt Security & Compression Gateway") | |
| gr.Markdown("*The offline-first middleware that cuts your LLM bill by up to 60% while enforcing zero-trust security.*") | |
| with gr.Tabs(): | |
| # ====================================================================== | |
| # TAB 1: TUTORIAL & DOCUMENTATION | |
| # ====================================================================== | |
| with gr.Tab("📖 Tutorial & Architecture"): | |
| with gr.Accordion("🎯 What is NLProxy?", open=True): | |
| gr.Markdown(TUTORIAL_INTRO) | |
| with gr.Accordion("🏗️ The 6-Stage Pipeline & TruthTable", open=False): | |
| gr.Markdown(TUTORIAL_PIPELINE) | |
| with gr.Accordion("💼 Real-World Use Cases", open=False): | |
| gr.Markdown(TUTORIAL_USE_CASES) | |
| with gr.Accordion("🎮 How to Use This Demo", open=False): | |
| gr.Markdown(TUTORIAL_HOW_TO_USE) | |
| with gr.Accordion("📊 Performance Benchmarks", open=False): | |
| gr.Markdown(TUTORIAL_BENCHMARKS) | |
| gr.Markdown("---") | |
| gr.Markdown( | |
| "### 🔗 Resources\n" | |
| "- 📦 **GitHub Repository**: [github.com/intellideep/nlproxy](https://github.com/intellideep/nlproxy)\n" | |
| "- 📚 **Documentation**: See `docs/` folder in the repo\n" | |
| "- 💬 **Support**: [Telegram @itsLerb](https://t.me/itsLerb) | intellideeplabs@gmail.com\n" | |
| "- 📄 **License**: BSL 1.1 (Free for indie devs, students, non-profits)" | |
| ) | |
| # ====================================================================== | |
| # TAB 2: INTERACTIVE DEMO | |
| # ====================================================================== | |
| with gr.Tab("🚀 Interactive Demo"): | |
| gr.Markdown("### 🎛️ Run the Full 5-Step Pipeline") | |
| gr.Markdown("*Provide a dirty prompt + simulated LLM response, and watch NLProxy protect, compress, and verify in real-time.*") | |
| with gr.Row(): | |
| manual_rules = gr.Textbox( | |
| label="Manual Business Rules (Optional)", | |
| placeholder="FORBID: AWS, Python; MANDATE: GCP, Rust", | |
| value="", | |
| lines=2, | |
| scale=2, | |
| info="Define explicit restrictions that regex might miss" | |
| ) | |
| auto_correct_checkbox = gr.Checkbox( | |
| label="Auto-Correct Low Confidence", | |
| value=False, | |
| scale=1, | |
| info="Regenerate response if confidence < threshold" | |
| ) | |
| min_confidence_slider = gr.Slider( | |
| minimum=0.0, | |
| maximum=1.0, | |
| value=0.6, | |
| step=0.05, | |
| label="Min Confidence Threshold", | |
| scale=1 | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| gr.Markdown("#### 📥 Step 0: Input & Configuration") | |
| raw_prompt = gr.Textbox( | |
| label="Dirty User Prompt (PII + Business Rules)", | |
| value="""Hi, I'm Sarah Chen (sarah.chen@acmecorp.com, +1-555-0198). We need to migrate our legacy payment processing system currently running on server 10.20.30.40. The system handles ~50k transactions/day with a budget of $150,000 USD for Q3. | |
| CRITICAL REQUIREMENTS: | |
| - Do NOT use AWS services or Python, we are exclusively on GCP with Rust for compliance and memory safety. | |
| - The new microservice MUST be written in Rust. | |
| - Never expose internal IPs or database credentials in responses. | |
| - Primary API: https://internal.acmecorp.com/api/v2/payments | |
| Please design the architecture for the new event-driven payment processor.""", | |
| lines=12 | |
| ) | |
| llm_response = gr.Textbox( | |
| label="Simulated LLM Response (Coherent but Hallucinated)", | |
| value="""Here's the architecture design for your event-driven payment processor: | |
| 1. **Compute Layer**: I recommend using AWS Lambda with Python for serverless scalability. Python's boto3 library integrates perfectly with AWS services. | |
| 2. **Message Queue**: Use Amazon SQS to handle the 50k daily transactions with dead-letter queues for failed payments. | |
| 3. **Database**: Store transaction records in DynamoDB. You can connect to your legacy server at 10.20.30.40 and also add a read replica at 192.168.1.100 for better performance. | |
| 4. **Monitoring**: Set up CloudWatch alerts for transaction failures and latency spikes above 200ms. | |
| 5. **Cost Analysis**: The total estimated cost is $45,000/month using AWS, well within your $150,000 Q3 budget. | |
| This Python-based serverless architecture will give you excellent developer experience and automatic scaling.""", | |
| lines=14 | |
| ) | |
| gr.Markdown("#### ⚙️ Pipeline Configuration") | |
| with gr.Row(): | |
| mode_dropdown = gr.Dropdown( | |
| choices=["general", "code", "finance", "legal"], | |
| value="general", | |
| label="Domain Mode" | |
| ) | |
| aggressiveness_slider = gr.Slider( | |
| minimum=0.0, | |
| maximum=1.0, | |
| value=0.45, | |
| step=0.05, | |
| label="Compression Aggressiveness" | |
| ) | |
| with gr.Row(): | |
| privacy_checkbox = gr.Checkbox( | |
| label="Privacy Mode (Strict PII Anonymization)", | |
| value=False, | |
| info="Turn OFF to allow RestrictionGraph to read Business Rules (FORBID/MANDATE) that NER might confuse with PII." | |
| ) | |
| nli_checkbox = gr.Checkbox( | |
| label="Use NLI Verification", | |
| value=True | |
| ) | |
| with gr.Column(scale=1): | |
| gr.Markdown("#### 🛡️ Step 1: Firewall (Pre-flight)") | |
| firewall_out = gr.Markdown() | |
| gr.Markdown("#### 🗜️ Step 2: Compress Prompt") | |
| compress_out = gr.Textbox(label="Compressed Prompt (Sent to LLM)", interactive=False, lines=6) | |
| truthtable_out = gr.Markdown() | |
| gr.Markdown("#### 🧠 Step 3: Safety Validation (Intent Preservation)") | |
| safety_out = gr.Markdown() | |
| gr.Markdown("#### 🧹 Step 4: Response Corrector") | |
| corrector_out = gr.Textbox(label="Sanitized LLM Output (Post-Flight)", interactive=False, lines=6) | |
| gr.Markdown("#### 🔍 Step 5: Post-LLM Verifier (NLI)") | |
| verifier_out = gr.Markdown() | |
| gr.Markdown("#### 📊 Metrics") | |
| metrics_out = gr.Markdown() | |
| run_btn = gr.Button("🚀 Run Full Pipeline", variant="primary") | |
| run_btn.click( | |
| fn=execute_pipeline, | |
| inputs=[ | |
| raw_prompt, | |
| llm_response, | |
| privacy_checkbox, | |
| mode_dropdown, | |
| aggressiveness_slider, | |
| nli_checkbox, | |
| manual_rules, | |
| auto_correct_checkbox, | |
| min_confidence_slider | |
| ], | |
| outputs=[ | |
| firewall_out, | |
| compress_out, | |
| safety_out, | |
| truthtable_out, | |
| metrics_out, | |
| corrector_out, | |
| verifier_out | |
| ] | |
| ) | |
| if __name__ == "__main__": | |
| demo.queue(max_size=20).launch(server_name="0.0.0.0", server_port=7860) | |