Spaces:
Running
Running
| """NLPProxy Enterprise SDK | |
| Author: IntelliDeep Labs Team | |
| License: BSL 1.1 | |
| """ | |
| __version__ = "1.2.1" | |
| import os | |
| # Safe CUDA checking before loading heavy libraries | |
| # To prevent PyTorch/ONNX from crashing with missing NCCL/CUDA shared object errors on CPU-only machines: | |
| if os.getenv("CUDA_VISIBLE_DEVICES") is None: | |
| if not os.path.exists("/proc/driver/nvidia/version"): | |
| os.environ["CUDA_VISIBLE_DEVICES"] = "" | |
| import logging | |
| from pathlib import Path | |
| from nlproxy.core.model_manager import ModelManager | |
| from nlproxy.core.shield import PromptShield, DomainMode | |
| from nlproxy.core.segmenter import SemanticSegmenter | |
| from nlproxy.core.compressor import SemanticCompressor | |
| from nlproxy.core.reconstructor import PromptReconstructor | |
| from nlproxy.core.safety import SafetyChecker | |
| from nlproxy.core.verifier import PostLLMVerifier | |
| from nlproxy.core.corrector import ResponseCorrector | |
| from nlproxy.cache.semantic_cache import SemanticLLMCache | |
| from nlproxy.firewall.firewall import PromptFirewall | |
| from nlproxy.service.compression import CompressionService | |
| logger = logging.getLogger(__name__) | |
| def setup_models(models_dir: str | Path | None = None) -> None: | |
| """Call this once at application startup (CLI, FastAPI, or scripts).""" | |
| try: | |
| manager = ModelManager.get_instance(str(models_dir)) | |
| manager.sync_ensure_ready() | |
| logger.info("🌍 NLProxy models verified globally.") | |
| except Exception as e: | |
| logger.error(f"❌ Model initialization failed: {e}") | |
| raise | |
| # ============================================================================= | |
| # COMPILED SDK COMPATIBILITY LAYER | |
| # ============================================================================= | |
| _global_service = None | |
| class CompressRequest: | |
| def __init__(self, text: str, mode: str, aggressiveness: float): | |
| self.text = text | |
| self.mode = mode | |
| self.aggressiveness = aggressiveness | |
| class CompressResponse: | |
| def __init__( | |
| self, | |
| processed_text: str, | |
| original_len: float, | |
| compressed_len: float, | |
| placeholders: dict, | |
| violations: list, | |
| ): | |
| self.processed_text = processed_text | |
| self.original_len = original_len | |
| self.compressed_len = compressed_len | |
| self.placeholders = placeholders | |
| self.violations = violations | |
| class CompressUnifiedRequest: | |
| def __init__( | |
| self, | |
| prompt: str, | |
| domain: str, | |
| aggressiveness: float, | |
| provider: str = "", | |
| model: str = "", | |
| max_tokens: int | None = None, | |
| temperature: float | None = None, | |
| bypass_cache: bool = False, | |
| check_firewall: bool = True, | |
| semantic_drift_threshold: float | None = None, | |
| ): | |
| self.prompt = prompt | |
| self.domain = domain | |
| self.aggressiveness = aggressiveness | |
| self.provider = provider | |
| self.model = model | |
| self.max_tokens = max_tokens | |
| self.temperature = temperature | |
| self.bypass_cache = bypass_cache | |
| self.check_firewall = check_firewall | |
| self.semantic_drift_threshold = semantic_drift_threshold | |
| class CompressUnifiedResponse: | |
| def __init__( | |
| self, | |
| allowed: bool, | |
| cache_hit: bool, | |
| processed_prompt: str, | |
| raw_response: str, | |
| final_response: str, | |
| confidence_score: float, | |
| violations: list, | |
| matched_rules: list, | |
| latency_ms: float, | |
| ): | |
| self.allowed = allowed | |
| self.cache_hit = cache_hit | |
| self.processed_prompt = processed_prompt | |
| self.raw_response = raw_response | |
| self.final_response = final_response | |
| self.confidence_score = confidence_score | |
| self.violations = violations | |
| self.matched_rules = matched_rules | |
| self.latency_ms = latency_ms | |
| def init_engine( | |
| models_dir_or_path: str = "models", | |
| config_path: str | None = None, | |
| tokenizer_path: str | None = None | |
| ) -> bool: | |
| """Initialize the embedding engine with local models. | |
| Can be called in two ways: | |
| 1. init_engine("path/to/models_dir") -> Resolves default files under all-MiniLM-L6-v2/ | |
| 2. init_engine("model_path", "config_path", "tokenizer_path") -> Custom paths (backward compatibility) | |
| """ | |
| global _global_service | |
| try: | |
| if config_path is not None and tokenizer_path is not None: | |
| # Case 2: Custom paths, extract parent parent as models_dir | |
| models_dir = Path(models_dir_or_path).parent.parent | |
| else: | |
| # Case 1: Base models directory | |
| models_dir = Path(models_dir_or_path) | |
| _global_service = CompressionService( | |
| use_cache=True, | |
| models_dir=models_dir, | |
| privacy_mode=False | |
| ) | |
| return True | |
| except Exception as e: | |
| logger.error(f"Failed to initialize engine: {e}") | |
| return False | |
| def ensure_models_ready(models_dir: str) -> None: | |
| """Download models if they are not already present.""" | |
| setup_models(models_dir) | |
| def compress_prompt(request: CompressRequest) -> CompressResponse: | |
| """Run shielding and prompt compression.""" | |
| global _global_service | |
| if _global_service is None: | |
| # Auto-initialize with default models if not done | |
| models_dir = os.getenv("NLPROXY_MODELS_DIR", "models") | |
| init_engine(models_dir) | |
| if _global_service is None: | |
| raise RuntimeError("Embedding engine not initialized. Call init_engine() first.") | |
| # Run the compression service batch pipeline | |
| results = _global_service.compress_batch( | |
| texts=[request.text], | |
| aggressiveness=request.aggressiveness, | |
| mode=request.mode | |
| ) | |
| if not results: | |
| raise RuntimeError("Compression failed") | |
| res_dict = results[0] | |
| # Retrieve the shield result from cache to get placeholders map | |
| shield_result = _global_service._shield_with_cache( | |
| text=request.text, | |
| mode=request.mode | |
| ) | |
| return CompressResponse( | |
| processed_text=res_dict["compressed_text"], | |
| original_len=float(len(request.text)), | |
| compressed_len=float(len(res_dict["compressed_text"])), | |
| placeholders=shield_result.placeholder_map, | |
| violations=[] | |
| ) | |
| def run_unified_pipeline(request: CompressUnifiedRequest) -> CompressUnifiedResponse: | |
| """Execute unified orchestrated pipeline.""" | |
| import time | |
| import asyncio | |
| from nlproxy.firewall.firewall import PromptFirewall, FirewallAction | |
| from nlproxy.llm.client import LLMProvider, LLMClientFactory | |
| from nlproxy.core.verifier import PostLLMVerifier | |
| from nlproxy.core.corrector import ResponseCorrector | |
| start_time = time.time() | |
| # 1. Firewall check | |
| firewall = PromptFirewall() | |
| action, violations = firewall.check_prompt(request.prompt) | |
| if action == FirewallAction.BLOCK: | |
| return CompressUnifiedResponse( | |
| allowed=False, | |
| cache_hit=False, | |
| processed_prompt=request.prompt, | |
| raw_response="", | |
| final_response="", | |
| confidence_score=0.0, | |
| violations=violations, | |
| matched_rules=violations, | |
| latency_ms=(time.time() - start_time) * 1000 | |
| ) | |
| # 2. Compress prompt | |
| global _global_service | |
| if _global_service is None: | |
| models_dir = os.getenv("NLPROXY_MODELS_DIR", "models") | |
| init_engine(models_dir) | |
| comp_req = CompressRequest(request.prompt, request.domain, request.aggressiveness) | |
| comp_res = compress_prompt(comp_req) | |
| # 3. Call LLM | |
| provider = LLMProvider(request.provider) | |
| client = LLMClientFactory.get_or_create(provider, model=request.model) | |
| coro = client.generate(prompt=comp_res.processed_text) | |
| try: | |
| loop = asyncio.get_event_loop() | |
| except RuntimeError: | |
| loop = asyncio.new_event_loop() | |
| asyncio.set_event_loop(loop) | |
| if loop.is_running(): | |
| try: | |
| import nest_asyncio | |
| nest_asyncio.apply() | |
| except ImportError: | |
| pass | |
| generated = loop.run_until_complete(coro) | |
| else: | |
| generated = loop.run_until_complete(coro) | |
| raw_response = generated.text if hasattr(generated, "text") else str(generated) | |
| # Re-inject PII | |
| response_text = _global_service.reconstructor._reinject_entities(raw_response, comp_res.placeholders) | |
| # Corrector & Verifier | |
| corrector = ResponseCorrector(mode=request.domain) | |
| final_response = corrector.correct(response_text, _global_service.shield.shield(request.prompt)) | |
| verifier = PostLLMVerifier(mode=request.domain) | |
| verification = verifier.verify(final_response, _global_service.shield.shield(request.prompt)) | |
| return CompressUnifiedResponse( | |
| allowed=True, | |
| cache_hit=False, | |
| processed_prompt=comp_res.processed_text, | |
| raw_response=raw_response, | |
| final_response=final_response, | |
| confidence_score=verification.confidence_score, | |
| violations=verification.violations, | |
| matched_rules=[], | |
| latency_ms=(time.time() - start_time) * 1000 | |
| ) | |
| __all__ = [ | |
| "PromptShield", | |
| "DomainMode", | |
| "SemanticSegmenter", | |
| "SemanticCompressor", | |
| "PromptReconstructor", | |
| "SafetyChecker", | |
| "PostLLMVerifier", | |
| "ResponseCorrector", | |
| "SemanticLLMCache", | |
| "PromptFirewall", | |
| "CompressionService", | |
| # Compatibility exports | |
| "CompressRequest", | |
| "CompressResponse", | |
| "CompressUnifiedRequest", | |
| "CompressUnifiedResponse", | |
| "init_engine", | |
| "ensure_models_ready", | |
| "compress_prompt", | |
| "run_unified_pipeline", | |
| ] | |