"""NLPProxy Enterprise SDK Author: IntelliDeep Labs Team License: BSL 1.1 """ __version__ = "1.2.1" import os # Safe CUDA checking before loading heavy libraries # To prevent PyTorch/ONNX from crashing with missing NCCL/CUDA shared object errors on CPU-only machines: if os.getenv("CUDA_VISIBLE_DEVICES") is None: if not os.path.exists("/proc/driver/nvidia/version"): os.environ["CUDA_VISIBLE_DEVICES"] = "" import logging from pathlib import Path from nlproxy.core.model_manager import ModelManager from nlproxy.core.shield import PromptShield, DomainMode from nlproxy.core.segmenter import SemanticSegmenter from nlproxy.core.compressor import SemanticCompressor from nlproxy.core.reconstructor import PromptReconstructor from nlproxy.core.safety import SafetyChecker from nlproxy.core.verifier import PostLLMVerifier from nlproxy.core.corrector import ResponseCorrector from nlproxy.cache.semantic_cache import SemanticLLMCache from nlproxy.firewall.firewall import PromptFirewall from nlproxy.service.compression import CompressionService logger = logging.getLogger(__name__) def setup_models(models_dir: str | Path | None = None) -> None: """Call this once at application startup (CLI, FastAPI, or scripts).""" try: manager = ModelManager.get_instance(str(models_dir)) manager.sync_ensure_ready() logger.info("🌍 NLProxy models verified globally.") except Exception as e: logger.error(f"❌ Model initialization failed: {e}") raise # ============================================================================= # COMPILED SDK COMPATIBILITY LAYER # ============================================================================= _global_service = None class CompressRequest: def __init__(self, text: str, mode: str, aggressiveness: float): self.text = text self.mode = mode self.aggressiveness = aggressiveness class CompressResponse: def __init__( self, processed_text: str, original_len: float, compressed_len: float, placeholders: dict, violations: list, ): self.processed_text = processed_text self.original_len = original_len self.compressed_len = compressed_len self.placeholders = placeholders self.violations = violations class CompressUnifiedRequest: def __init__( self, prompt: str, domain: str, aggressiveness: float, provider: str = "", model: str = "", max_tokens: int | None = None, temperature: float | None = None, bypass_cache: bool = False, check_firewall: bool = True, semantic_drift_threshold: float | None = None, ): self.prompt = prompt self.domain = domain self.aggressiveness = aggressiveness self.provider = provider self.model = model self.max_tokens = max_tokens self.temperature = temperature self.bypass_cache = bypass_cache self.check_firewall = check_firewall self.semantic_drift_threshold = semantic_drift_threshold class CompressUnifiedResponse: def __init__( self, allowed: bool, cache_hit: bool, processed_prompt: str, raw_response: str, final_response: str, confidence_score: float, violations: list, matched_rules: list, latency_ms: float, ): self.allowed = allowed self.cache_hit = cache_hit self.processed_prompt = processed_prompt self.raw_response = raw_response self.final_response = final_response self.confidence_score = confidence_score self.violations = violations self.matched_rules = matched_rules self.latency_ms = latency_ms def init_engine( models_dir_or_path: str = "models", config_path: str | None = None, tokenizer_path: str | None = None ) -> bool: """Initialize the embedding engine with local models. Can be called in two ways: 1. init_engine("path/to/models_dir") -> Resolves default files under all-MiniLM-L6-v2/ 2. init_engine("model_path", "config_path", "tokenizer_path") -> Custom paths (backward compatibility) """ global _global_service try: if config_path is not None and tokenizer_path is not None: # Case 2: Custom paths, extract parent parent as models_dir models_dir = Path(models_dir_or_path).parent.parent else: # Case 1: Base models directory models_dir = Path(models_dir_or_path) _global_service = CompressionService( use_cache=True, models_dir=models_dir, privacy_mode=False ) return True except Exception as e: logger.error(f"Failed to initialize engine: {e}") return False def ensure_models_ready(models_dir: str) -> None: """Download models if they are not already present.""" setup_models(models_dir) def compress_prompt(request: CompressRequest) -> CompressResponse: """Run shielding and prompt compression.""" global _global_service if _global_service is None: # Auto-initialize with default models if not done models_dir = os.getenv("NLPROXY_MODELS_DIR", "models") init_engine(models_dir) if _global_service is None: raise RuntimeError("Embedding engine not initialized. Call init_engine() first.") # Run the compression service batch pipeline results = _global_service.compress_batch( texts=[request.text], aggressiveness=request.aggressiveness, mode=request.mode ) if not results: raise RuntimeError("Compression failed") res_dict = results[0] # Retrieve the shield result from cache to get placeholders map shield_result = _global_service._shield_with_cache( text=request.text, mode=request.mode ) return CompressResponse( processed_text=res_dict["compressed_text"], original_len=float(len(request.text)), compressed_len=float(len(res_dict["compressed_text"])), placeholders=shield_result.placeholder_map, violations=[] ) def run_unified_pipeline(request: CompressUnifiedRequest) -> CompressUnifiedResponse: """Execute unified orchestrated pipeline.""" import time import asyncio from nlproxy.firewall.firewall import PromptFirewall, FirewallAction from nlproxy.llm.client import LLMProvider, LLMClientFactory from nlproxy.core.verifier import PostLLMVerifier from nlproxy.core.corrector import ResponseCorrector start_time = time.time() # 1. Firewall check firewall = PromptFirewall() action, violations = firewall.check_prompt(request.prompt) if action == FirewallAction.BLOCK: return CompressUnifiedResponse( allowed=False, cache_hit=False, processed_prompt=request.prompt, raw_response="", final_response="", confidence_score=0.0, violations=violations, matched_rules=violations, latency_ms=(time.time() - start_time) * 1000 ) # 2. Compress prompt global _global_service if _global_service is None: models_dir = os.getenv("NLPROXY_MODELS_DIR", "models") init_engine(models_dir) comp_req = CompressRequest(request.prompt, request.domain, request.aggressiveness) comp_res = compress_prompt(comp_req) # 3. Call LLM provider = LLMProvider(request.provider) client = LLMClientFactory.get_or_create(provider, model=request.model) coro = client.generate(prompt=comp_res.processed_text) try: loop = asyncio.get_event_loop() except RuntimeError: loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) if loop.is_running(): try: import nest_asyncio nest_asyncio.apply() except ImportError: pass generated = loop.run_until_complete(coro) else: generated = loop.run_until_complete(coro) raw_response = generated.text if hasattr(generated, "text") else str(generated) # Re-inject PII response_text = _global_service.reconstructor._reinject_entities(raw_response, comp_res.placeholders) # Corrector & Verifier corrector = ResponseCorrector(mode=request.domain) final_response = corrector.correct(response_text, _global_service.shield.shield(request.prompt)) verifier = PostLLMVerifier(mode=request.domain) verification = verifier.verify(final_response, _global_service.shield.shield(request.prompt)) return CompressUnifiedResponse( allowed=True, cache_hit=False, processed_prompt=comp_res.processed_text, raw_response=raw_response, final_response=final_response, confidence_score=verification.confidence_score, violations=verification.violations, matched_rules=[], latency_ms=(time.time() - start_time) * 1000 ) __all__ = [ "PromptShield", "DomainMode", "SemanticSegmenter", "SemanticCompressor", "PromptReconstructor", "SafetyChecker", "PostLLMVerifier", "ResponseCorrector", "SemanticLLMCache", "PromptFirewall", "CompressionService", # Compatibility exports "CompressRequest", "CompressResponse", "CompressUnifiedRequest", "CompressUnifiedResponse", "init_engine", "ensure_models_ready", "compress_prompt", "run_unified_pipeline", ]