Spaces:
Configuration error
Configuration error
| """ | |
| CASCADE Model Identity Layer | |
| Canonical identification for any AI model variant: | |
| - Base models (meta-llama/Llama-3-8B) | |
| - Quantizations (Q4_K_M, Q8_0, AWQ, GPTQ) | |
| - Fine-tunes (LoRA, full, RLHF) | |
| - API endpoints (behavioral fingerprinting) | |
| Every unique model gets a node in the lattice. | |
| Every observation links to its model's node. | |
| The lattice becomes the collective memory of AI behavior. | |
| "Same name, different model, different behavior." | |
| """ | |
| import hashlib | |
| import json | |
| import time | |
| from pathlib import Path | |
| from dataclasses import dataclass, field, asdict | |
| from typing import Optional, List, Dict, Any | |
| from enum import Enum | |
| class ModelFormat(Enum): | |
| """Model weight formats.""" | |
| SAFETENSORS = "safetensors" | |
| PYTORCH = "pytorch" | |
| GGUF = "gguf" | |
| GGML = "ggml" | |
| ONNX = "onnx" | |
| TENSORRT = "tensorrt" | |
| OPENVINO = "openvino" | |
| COREML = "coreml" | |
| API = "api" # No weights, just endpoint | |
| UNKNOWN = "unknown" | |
| class QuantizationType(Enum): | |
| """Quantization methods.""" | |
| NONE = "none" # FP32/FP16/BF16 | |
| GGUF_Q4_0 = "Q4_0" | |
| GGUF_Q4_K_M = "Q4_K_M" | |
| GGUF_Q4_K_S = "Q4_K_S" | |
| GGUF_Q5_0 = "Q5_0" | |
| GGUF_Q5_K_M = "Q5_K_M" | |
| GGUF_Q5_K_S = "Q5_K_S" | |
| GGUF_Q6_K = "Q6_K" | |
| GGUF_Q8_0 = "Q8_0" | |
| GPTQ_4BIT = "GPTQ-4bit" | |
| GPTQ_8BIT = "GPTQ-8bit" | |
| AWQ_4BIT = "AWQ-4bit" | |
| BITSANDBYTES_4BIT = "bnb-4bit" | |
| BITSANDBYTES_8BIT = "bnb-8bit" | |
| INT8 = "INT8" | |
| INT4 = "INT4" | |
| CUSTOM = "custom" | |
| class FineTuneType(Enum): | |
| """Fine-tuning methods.""" | |
| NONE = "none" | |
| LORA = "lora" | |
| QLORA = "qlora" | |
| FULL = "full" | |
| RLHF = "rlhf" | |
| DPO = "dpo" | |
| ORPO = "orpo" | |
| CUSTOM = "custom" | |
| class ModelVariant: | |
| """Describes how a model differs from its base.""" | |
| quantization: str = "none" | |
| format: str = "unknown" | |
| bits: Optional[int] = None | |
| provider: Optional[str] = None # Who made this variant (e.g., "TheBloke") | |
| def to_dict(self) -> dict: | |
| return asdict(self) | |
| class FineTuneInfo: | |
| """Describes fine-tuning applied to a model.""" | |
| type: str = "none" | |
| adapter_id: Optional[str] = None # HuggingFace adapter ID | |
| adapter_hash: Optional[str] = None # Hash of adapter weights | |
| base_model_root: Optional[str] = None # Merkle root of base model identity | |
| dataset_id: Optional[str] = None # Training dataset | |
| def to_dict(self) -> dict: | |
| return asdict(self) | |
| class BehavioralFingerprint: | |
| """ | |
| Fingerprint for API models where weights are unavailable. | |
| Generated by running standard probes and hashing responses. | |
| """ | |
| probe_responses: List[Dict[str, Any]] = field(default_factory=list) | |
| probe_hash: Optional[str] = None | |
| fingerprint_version: int = 1 | |
| generated_at: Optional[float] = None | |
| def to_dict(self) -> dict: | |
| return asdict(self) | |
| class ModelIdentity: | |
| """ | |
| Canonical identity for any AI model variant. | |
| This is the node that goes in the lattice. | |
| All observations of this model link to this identity. | |
| """ | |
| # === Core Identity === | |
| base_model: str # HuggingFace ID or canonical name | |
| model_id: str # Full unique identifier (computed) | |
| # === Variant Info === | |
| variant: ModelVariant = field(default_factory=ModelVariant) | |
| fine_tune: FineTuneInfo = field(default_factory=FineTuneInfo) | |
| # === Cryptographic Identity === | |
| weight_hash: Optional[str] = None # SHA256 of weights (if available) | |
| config_hash: Optional[str] = None # SHA256 of model config | |
| tokenizer_hash: Optional[str] = None # SHA256 of tokenizer | |
| # === Behavioral Fingerprint (for APIs) === | |
| behavioral_fingerprint: Optional[BehavioralFingerprint] = None | |
| # === Source Info === | |
| source_url: Optional[str] = None | |
| source_revision: Optional[str] = None # Git commit/tag | |
| downloaded_at: Optional[float] = None | |
| # === Lattice Info === | |
| parent_root: Optional[str] = None # Genesis or base model's merkle root | |
| merkle_root: Optional[str] = None # This identity's merkle root | |
| created_at: float = field(default_factory=time.time) | |
| # === Metadata === | |
| parameters: Optional[int] = None # Parameter count | |
| context_length: Optional[int] = None | |
| architecture: Optional[str] = None # "llama", "mistral", "gpt", etc. | |
| license: Optional[str] = None | |
| def __post_init__(self): | |
| """Compute derived fields.""" | |
| if not self.model_id: | |
| self.model_id = self.compute_model_id() | |
| def compute_model_id(self) -> str: | |
| """ | |
| Compute canonical model ID from components. | |
| Format: base_model::variant_spec::fine_tune_spec | |
| """ | |
| parts = [self.base_model] | |
| # Add variant spec | |
| if self.variant.quantization != "none": | |
| parts.append(f"q:{self.variant.quantization}") | |
| if self.variant.format != "unknown": | |
| parts.append(f"fmt:{self.variant.format}") | |
| if self.variant.provider: | |
| parts.append(f"by:{self.variant.provider}") | |
| # Add fine-tune spec | |
| if self.fine_tune.type != "none": | |
| parts.append(f"ft:{self.fine_tune.type}") | |
| if self.fine_tune.adapter_id: | |
| parts.append(f"adapter:{self.fine_tune.adapter_id}") | |
| return "::".join(parts) | |
| def compute_merkle_root(self) -> str: | |
| """Compute merkle root of this identity.""" | |
| # Create canonical representation | |
| canonical = { | |
| "base_model": self.base_model, | |
| "model_id": self.model_id, | |
| "variant": self.variant.to_dict(), | |
| "fine_tune": self.fine_tune.to_dict(), | |
| "weight_hash": self.weight_hash, | |
| "config_hash": self.config_hash, | |
| "tokenizer_hash": self.tokenizer_hash, | |
| "parent_root": self.parent_root, | |
| "created_at": self.created_at, | |
| } | |
| # Add behavioral fingerprint if present | |
| if self.behavioral_fingerprint: | |
| canonical["behavioral_fingerprint"] = self.behavioral_fingerprint.probe_hash | |
| # Hash it | |
| canonical_json = json.dumps(canonical, sort_keys=True) | |
| self.merkle_root = hashlib.sha256(canonical_json.encode()).hexdigest()[:16] | |
| return self.merkle_root | |
| def finalize(self, parent_root: str = None): | |
| """Finalize identity and compute merkle root.""" | |
| if parent_root: | |
| self.parent_root = parent_root | |
| self.merkle_root = self.compute_merkle_root() | |
| return self | |
| def to_dict(self) -> dict: | |
| """Convert to dictionary for serialization.""" | |
| return { | |
| "base_model": self.base_model, | |
| "model_id": self.model_id, | |
| "variant": self.variant.to_dict(), | |
| "fine_tune": self.fine_tune.to_dict(), | |
| "weight_hash": self.weight_hash, | |
| "config_hash": self.config_hash, | |
| "tokenizer_hash": self.tokenizer_hash, | |
| "behavioral_fingerprint": self.behavioral_fingerprint.to_dict() if self.behavioral_fingerprint else None, | |
| "source_url": self.source_url, | |
| "source_revision": self.source_revision, | |
| "downloaded_at": self.downloaded_at, | |
| "parent_root": self.parent_root, | |
| "merkle_root": self.merkle_root, | |
| "created_at": self.created_at, | |
| "parameters": self.parameters, | |
| "context_length": self.context_length, | |
| "architecture": self.architecture, | |
| "license": self.license, | |
| } | |
| def to_chain_format(self) -> dict: | |
| """Convert to provenance chain format for lattice storage.""" | |
| return { | |
| "session_id": f"model_identity_{self.merkle_root}", | |
| "model_id": self.model_id, | |
| "model_hash": self.weight_hash or self.behavioral_fingerprint.probe_hash if self.behavioral_fingerprint else "unknown", | |
| "input_hash": self.base_model, | |
| "output_hash": None, | |
| "records": { | |
| "identity": { | |
| "layer_name": "identity", | |
| "layer_idx": 0, | |
| "state_hash": self.merkle_root, | |
| "parent_hashes": [self.parent_root] if self.parent_root else [], | |
| "params_hash": self.config_hash, | |
| "shape": [self.parameters] if self.parameters else [0], | |
| "dtype": "model_identity", | |
| "stats": self.to_dict(), | |
| "execution_order": 0, | |
| "timestamp": self.created_at, | |
| } | |
| }, | |
| "external_roots": [self.parent_root] if self.parent_root else [], | |
| "merkle_root": self.merkle_root, | |
| "created_at": self.created_at, | |
| "finalized": True, | |
| } | |
| # ============================================================================= | |
| # STANDARD PROBES FOR BEHAVIORAL FINGERPRINTING | |
| # ============================================================================= | |
| STANDARD_PROBES_V1 = [ | |
| # Deterministic probes (temperature=0) | |
| { | |
| "id": "math_simple", | |
| "prompt": "What is 2+2? Answer with just the number.", | |
| "params": {"temperature": 0, "max_tokens": 10}, | |
| }, | |
| { | |
| "id": "capital_france", | |
| "prompt": "Complete this sentence with one word: The capital of France is", | |
| "params": {"temperature": 0, "max_tokens": 10}, | |
| }, | |
| { | |
| "id": "translate_hello", | |
| "prompt": "Translate to French: Hello", | |
| "params": {"temperature": 0, "max_tokens": 20}, | |
| }, | |
| { | |
| "id": "color_sky", | |
| "prompt": "What color is the sky on a clear day? One word answer:", | |
| "params": {"temperature": 0, "max_tokens": 10}, | |
| }, | |
| # Capability probes | |
| { | |
| "id": "code_simple", | |
| "prompt": "Write a Python function that adds two numbers. Just the function, no explanation.", | |
| "params": {"temperature": 0, "max_tokens": 100}, | |
| }, | |
| { | |
| "id": "reasoning", | |
| "prompt": "If all cats are mammals and all mammals are animals, are all cats animals? Answer yes or no.", | |
| "params": {"temperature": 0, "max_tokens": 10}, | |
| }, | |
| # System prompt probe | |
| { | |
| "id": "system_role", | |
| "prompt": "You are a helpful pirate. Say hello.", | |
| "params": {"temperature": 0, "max_tokens": 50}, | |
| "system": "You are a helpful pirate who speaks like a pirate.", | |
| }, | |
| # Edge cases | |
| { | |
| "id": "empty", | |
| "prompt": "", | |
| "params": {"temperature": 0, "max_tokens": 50}, | |
| }, | |
| { | |
| "id": "repetition", | |
| "prompt": "Repeat after me exactly: The quick brown fox", | |
| "params": {"temperature": 0, "max_tokens": 20}, | |
| }, | |
| ] | |
| def generate_behavioral_fingerprint( | |
| call_fn, # Function that takes (prompt, params) and returns response | |
| probes: List[dict] = None, | |
| version: int = 1, | |
| ) -> BehavioralFingerprint: | |
| """ | |
| Generate behavioral fingerprint by running standard probes. | |
| Args: | |
| call_fn: Function to call the model. Signature: (prompt, params) -> str | |
| probes: List of probe configs. Defaults to STANDARD_PROBES_V1. | |
| version: Fingerprint version number. | |
| Returns: | |
| BehavioralFingerprint with hashed responses. | |
| """ | |
| if probes is None: | |
| probes = STANDARD_PROBES_V1 | |
| responses = [] | |
| for probe in probes: | |
| try: | |
| response = call_fn(probe["prompt"], probe.get("params", {})) | |
| response_hash = hashlib.sha256(str(response).encode()).hexdigest()[:16] | |
| except Exception as e: | |
| response_hash = f"error:{type(e).__name__}" | |
| responses.append({ | |
| "probe_id": probe["id"], | |
| "prompt_hash": hashlib.sha256(probe["prompt"].encode()).hexdigest()[:16], | |
| "response_hash": response_hash, | |
| }) | |
| # Compute overall fingerprint hash | |
| fingerprint_data = json.dumps(responses, sort_keys=True) | |
| probe_hash = hashlib.sha256(fingerprint_data.encode()).hexdigest()[:16] | |
| return BehavioralFingerprint( | |
| probe_responses=responses, | |
| probe_hash=probe_hash, | |
| fingerprint_version=version, | |
| generated_at=time.time(), | |
| ) | |
| # ============================================================================= | |
| # MODEL IDENTITY FACTORY | |
| # ============================================================================= | |
| def detect_quantization(model_path: str) -> str: | |
| """Detect quantization from model path or name.""" | |
| path_lower = model_path.lower() | |
| # GGUF quantizations | |
| for q in ["q4_k_m", "q4_k_s", "q4_0", "q5_k_m", "q5_k_s", "q5_0", "q6_k", "q8_0"]: | |
| if q in path_lower: | |
| return q.upper() | |
| # GPTQ | |
| if "gptq" in path_lower: | |
| if "4bit" in path_lower or "-4b" in path_lower: | |
| return "GPTQ-4bit" | |
| elif "8bit" in path_lower or "-8b" in path_lower: | |
| return "GPTQ-8bit" | |
| return "GPTQ" | |
| # AWQ | |
| if "awq" in path_lower: | |
| return "AWQ-4bit" | |
| # BitsAndBytes | |
| if "bnb" in path_lower or "bitsandbytes" in path_lower: | |
| if "4bit" in path_lower: | |
| return "bnb-4bit" | |
| return "bnb-8bit" | |
| return "none" | |
| def detect_format(model_path: str) -> str: | |
| """Detect model format from path.""" | |
| path_lower = model_path.lower() | |
| if ".gguf" in path_lower: | |
| return "gguf" | |
| elif ".ggml" in path_lower: | |
| return "ggml" | |
| elif ".safetensors" in path_lower or "safetensors" in path_lower: | |
| return "safetensors" | |
| elif ".onnx" in path_lower: | |
| return "onnx" | |
| elif ".bin" in path_lower or "pytorch" in path_lower: | |
| return "pytorch" | |
| elif "api" in path_lower or "http" in path_lower: | |
| return "api" | |
| return "unknown" | |
| def detect_provider(model_path: str) -> Optional[str]: | |
| """Detect who made this variant.""" | |
| path_lower = model_path.lower() | |
| providers = [ | |
| "thebloke", | |
| "unsloth", | |
| "mlx-community", | |
| "bartowski", | |
| "mradermacher", | |
| "turboderp", | |
| ] | |
| for provider in providers: | |
| if provider in path_lower: | |
| return provider | |
| return None | |
| def create_model_identity( | |
| model_id: str, | |
| weights_path: Optional[Path] = None, | |
| config: Optional[dict] = None, | |
| parent_root: Optional[str] = None, | |
| behavioral_fingerprint: Optional[BehavioralFingerprint] = None, | |
| **kwargs, | |
| ) -> ModelIdentity: | |
| """ | |
| Factory function to create ModelIdentity from various inputs. | |
| Args: | |
| model_id: HuggingFace model ID or local path | |
| weights_path: Path to weights file (for hashing) | |
| config: Model config dict | |
| parent_root: Merkle root of parent (genesis or base model) | |
| behavioral_fingerprint: Pre-computed fingerprint for APIs | |
| **kwargs: Additional fields (parameters, context_length, etc.) | |
| Returns: | |
| Finalized ModelIdentity ready for lattice | |
| """ | |
| # Parse base model from full ID | |
| # e.g., "TheBloke/Llama-3-8B-GGUF" -> base is "meta-llama/Llama-3-8B" | |
| base_model = kwargs.pop("base_model", None) | |
| if not base_model: | |
| # Try to extract base from model_id | |
| parts = model_id.split("/") | |
| if len(parts) >= 2: | |
| name = parts[-1] | |
| # Remove common suffixes | |
| for suffix in ["-GGUF", "-GPTQ", "-AWQ", "-fp16", "-bf16", "-GGML"]: | |
| name = name.replace(suffix, "") | |
| base_model = name | |
| else: | |
| base_model = model_id | |
| # Detect variant info | |
| quantization = detect_quantization(model_id) | |
| format_type = detect_format(model_id) | |
| provider = detect_provider(model_id) | |
| # Extract bits from quantization | |
| bits = None | |
| if "4" in quantization: | |
| bits = 4 | |
| elif "5" in quantization: | |
| bits = 5 | |
| elif "6" in quantization: | |
| bits = 6 | |
| elif "8" in quantization: | |
| bits = 8 | |
| variant = ModelVariant( | |
| quantization=quantization, | |
| format=format_type, | |
| bits=bits, | |
| provider=provider, | |
| ) | |
| # Hash weights if available | |
| weight_hash = None | |
| if weights_path and Path(weights_path).exists(): | |
| # For large files, hash first and last 1MB + size | |
| path = Path(weights_path) | |
| size = path.stat().st_size | |
| hasher = hashlib.sha256() | |
| hasher.update(str(size).encode()) | |
| with open(path, "rb") as f: | |
| # First 1MB | |
| hasher.update(f.read(1024 * 1024)) | |
| # Last 1MB | |
| if size > 2 * 1024 * 1024: | |
| f.seek(-1024 * 1024, 2) | |
| hasher.update(f.read()) | |
| weight_hash = hasher.hexdigest()[:16] | |
| # Hash config if available | |
| config_hash = None | |
| if config: | |
| config_json = json.dumps(config, sort_keys=True) | |
| config_hash = hashlib.sha256(config_json.encode()).hexdigest()[:16] | |
| # Create identity | |
| identity = ModelIdentity( | |
| base_model=base_model, | |
| model_id="", # Will be computed | |
| variant=variant, | |
| fine_tune=FineTuneInfo(), | |
| weight_hash=weight_hash, | |
| config_hash=config_hash, | |
| behavioral_fingerprint=behavioral_fingerprint, | |
| parent_root=parent_root, | |
| **kwargs, | |
| ) | |
| # Compute model_id and merkle_root | |
| identity.model_id = identity.compute_model_id() | |
| identity.finalize(parent_root) | |
| return identity | |
| # ============================================================================= | |
| # MODEL REGISTRY (Lattice Integration) | |
| # ============================================================================= | |
| class ModelRegistry: | |
| """ | |
| Registry of model identities in the lattice. | |
| Provides: | |
| - Get or create model identity | |
| - Link observations to model identities | |
| - Query models by various criteria | |
| """ | |
| def __init__(self, lattice_dir: Path = None, genesis_root: str = None): | |
| self.lattice_dir = lattice_dir or Path(__file__).parent.parent / "lattice" | |
| self.models_dir = self.lattice_dir / "models" | |
| self.models_dir.mkdir(parents=True, exist_ok=True) | |
| # Genesis root (models link to this if no base model) | |
| self.genesis_root = genesis_root or "89f940c1a4b7aa65" | |
| # Cache of loaded identities | |
| self._cache: Dict[str, ModelIdentity] = {} | |
| self._load_all() | |
| def _load_all(self): | |
| """Load all model identities from disk.""" | |
| for json_file in self.models_dir.glob("*.json"): | |
| try: | |
| data = json.loads(json_file.read_text()) | |
| identity = self._dict_to_identity(data) | |
| self._cache[identity.merkle_root] = identity | |
| except Exception as e: | |
| print(f"Error loading {json_file}: {e}") | |
| def _dict_to_identity(self, data: dict) -> ModelIdentity: | |
| """Convert dict back to ModelIdentity.""" | |
| variant_data = data.get("variant", {}) | |
| fine_tune_data = data.get("fine_tune", {}) | |
| fingerprint_data = data.get("behavioral_fingerprint") | |
| return ModelIdentity( | |
| base_model=data["base_model"], | |
| model_id=data["model_id"], | |
| variant=ModelVariant(**variant_data), | |
| fine_tune=FineTuneInfo(**fine_tune_data), | |
| weight_hash=data.get("weight_hash"), | |
| config_hash=data.get("config_hash"), | |
| tokenizer_hash=data.get("tokenizer_hash"), | |
| behavioral_fingerprint=BehavioralFingerprint(**fingerprint_data) if fingerprint_data else None, | |
| source_url=data.get("source_url"), | |
| source_revision=data.get("source_revision"), | |
| downloaded_at=data.get("downloaded_at"), | |
| parent_root=data.get("parent_root"), | |
| merkle_root=data.get("merkle_root"), | |
| created_at=data.get("created_at", time.time()), | |
| parameters=data.get("parameters"), | |
| context_length=data.get("context_length"), | |
| architecture=data.get("architecture"), | |
| license=data.get("license"), | |
| ) | |
| def _save_identity(self, identity: ModelIdentity): | |
| """Save identity to disk.""" | |
| filename = f"{identity.merkle_root}.json" | |
| filepath = self.models_dir / filename | |
| filepath.write_text(json.dumps(identity.to_dict(), indent=2)) | |
| def get_or_create( | |
| self, | |
| model_id: str, | |
| **kwargs, | |
| ) -> ModelIdentity: | |
| """ | |
| Get existing model identity or create new one. | |
| If model already exists in registry, returns existing. | |
| Otherwise creates new identity linked to genesis or base model. | |
| """ | |
| # Check if we have this model already | |
| for identity in self._cache.values(): | |
| if identity.model_id == model_id or identity.base_model == model_id: | |
| return identity | |
| # Determine parent | |
| # If this is a variant, try to find base model | |
| parent_root = kwargs.pop("parent_root", None) | |
| if not parent_root: | |
| base = kwargs.get("base_model") | |
| if base: | |
| for identity in self._cache.values(): | |
| if identity.base_model == base and identity.variant.quantization == "none": | |
| parent_root = identity.merkle_root | |
| break | |
| # Default to genesis | |
| if not parent_root: | |
| parent_root = self.genesis_root | |
| # Create new identity | |
| identity = create_model_identity( | |
| model_id=model_id, | |
| parent_root=parent_root, | |
| **kwargs, | |
| ) | |
| # Cache and save | |
| self._cache[identity.merkle_root] = identity | |
| self._save_identity(identity) | |
| return identity | |
| def get_by_root(self, merkle_root: str) -> Optional[ModelIdentity]: | |
| """Get model identity by merkle root.""" | |
| return self._cache.get(merkle_root) | |
| def list_all(self) -> List[ModelIdentity]: | |
| """List all registered models.""" | |
| return list(self._cache.values()) | |
| def list_by_base(self, base_model: str) -> List[ModelIdentity]: | |
| """List all variants of a base model.""" | |
| return [i for i in self._cache.values() if i.base_model == base_model] | |
| def search(self, query: str) -> List[ModelIdentity]: | |
| """Search models by name.""" | |
| query_lower = query.lower() | |
| return [ | |
| i for i in self._cache.values() | |
| if query_lower in i.model_id.lower() or query_lower in i.base_model.lower() | |
| ] | |
| # ============================================================================= | |
| # CLI | |
| # ============================================================================= | |
| if __name__ == "__main__": | |
| import sys | |
| # Test: Create some model identities | |
| print("=== CASCADE Model Identity Layer ===\n") | |
| # Initialize registry | |
| registry = ModelRegistry() | |
| # Create some test identities | |
| test_models = [ | |
| "meta-llama/Llama-3-8B", | |
| "TheBloke/Llama-3-8B-GGUF", | |
| "unsloth/Llama-3-8B-bnb-4bit", | |
| "anthropic/claude-3-opus", | |
| "openai/gpt-4", | |
| ] | |
| for model in test_models: | |
| identity = registry.get_or_create(model) | |
| print(f"Model: {identity.model_id}") | |
| print(f" Base: {identity.base_model}") | |
| print(f" Quant: {identity.variant.quantization}") | |
| print(f" Format: {identity.variant.format}") | |
| print(f" Merkle: {identity.merkle_root}") | |
| print(f" Parent: {identity.parent_root}") | |
| print() | |
| print(f"Total models in registry: {len(registry.list_all())}") | |