Cascade / cascade /identity.py
tostido's picture
Initial commit - cascade-lattice 0.5.4
77bcbf1
"""
CASCADE Model Identity Layer
Canonical identification for any AI model variant:
- Base models (meta-llama/Llama-3-8B)
- Quantizations (Q4_K_M, Q8_0, AWQ, GPTQ)
- Fine-tunes (LoRA, full, RLHF)
- API endpoints (behavioral fingerprinting)
Every unique model gets a node in the lattice.
Every observation links to its model's node.
The lattice becomes the collective memory of AI behavior.
"Same name, different model, different behavior."
"""
import hashlib
import json
import time
from pathlib import Path
from dataclasses import dataclass, field, asdict
from typing import Optional, List, Dict, Any
from enum import Enum
class ModelFormat(Enum):
"""Model weight formats."""
SAFETENSORS = "safetensors"
PYTORCH = "pytorch"
GGUF = "gguf"
GGML = "ggml"
ONNX = "onnx"
TENSORRT = "tensorrt"
OPENVINO = "openvino"
COREML = "coreml"
API = "api" # No weights, just endpoint
UNKNOWN = "unknown"
class QuantizationType(Enum):
"""Quantization methods."""
NONE = "none" # FP32/FP16/BF16
GGUF_Q4_0 = "Q4_0"
GGUF_Q4_K_M = "Q4_K_M"
GGUF_Q4_K_S = "Q4_K_S"
GGUF_Q5_0 = "Q5_0"
GGUF_Q5_K_M = "Q5_K_M"
GGUF_Q5_K_S = "Q5_K_S"
GGUF_Q6_K = "Q6_K"
GGUF_Q8_0 = "Q8_0"
GPTQ_4BIT = "GPTQ-4bit"
GPTQ_8BIT = "GPTQ-8bit"
AWQ_4BIT = "AWQ-4bit"
BITSANDBYTES_4BIT = "bnb-4bit"
BITSANDBYTES_8BIT = "bnb-8bit"
INT8 = "INT8"
INT4 = "INT4"
CUSTOM = "custom"
class FineTuneType(Enum):
"""Fine-tuning methods."""
NONE = "none"
LORA = "lora"
QLORA = "qlora"
FULL = "full"
RLHF = "rlhf"
DPO = "dpo"
ORPO = "orpo"
CUSTOM = "custom"
@dataclass
class ModelVariant:
"""Describes how a model differs from its base."""
quantization: str = "none"
format: str = "unknown"
bits: Optional[int] = None
provider: Optional[str] = None # Who made this variant (e.g., "TheBloke")
def to_dict(self) -> dict:
return asdict(self)
@dataclass
class FineTuneInfo:
"""Describes fine-tuning applied to a model."""
type: str = "none"
adapter_id: Optional[str] = None # HuggingFace adapter ID
adapter_hash: Optional[str] = None # Hash of adapter weights
base_model_root: Optional[str] = None # Merkle root of base model identity
dataset_id: Optional[str] = None # Training dataset
def to_dict(self) -> dict:
return asdict(self)
@dataclass
class BehavioralFingerprint:
"""
Fingerprint for API models where weights are unavailable.
Generated by running standard probes and hashing responses.
"""
probe_responses: List[Dict[str, Any]] = field(default_factory=list)
probe_hash: Optional[str] = None
fingerprint_version: int = 1
generated_at: Optional[float] = None
def to_dict(self) -> dict:
return asdict(self)
@dataclass
class ModelIdentity:
"""
Canonical identity for any AI model variant.
This is the node that goes in the lattice.
All observations of this model link to this identity.
"""
# === Core Identity ===
base_model: str # HuggingFace ID or canonical name
model_id: str # Full unique identifier (computed)
# === Variant Info ===
variant: ModelVariant = field(default_factory=ModelVariant)
fine_tune: FineTuneInfo = field(default_factory=FineTuneInfo)
# === Cryptographic Identity ===
weight_hash: Optional[str] = None # SHA256 of weights (if available)
config_hash: Optional[str] = None # SHA256 of model config
tokenizer_hash: Optional[str] = None # SHA256 of tokenizer
# === Behavioral Fingerprint (for APIs) ===
behavioral_fingerprint: Optional[BehavioralFingerprint] = None
# === Source Info ===
source_url: Optional[str] = None
source_revision: Optional[str] = None # Git commit/tag
downloaded_at: Optional[float] = None
# === Lattice Info ===
parent_root: Optional[str] = None # Genesis or base model's merkle root
merkle_root: Optional[str] = None # This identity's merkle root
created_at: float = field(default_factory=time.time)
# === Metadata ===
parameters: Optional[int] = None # Parameter count
context_length: Optional[int] = None
architecture: Optional[str] = None # "llama", "mistral", "gpt", etc.
license: Optional[str] = None
def __post_init__(self):
"""Compute derived fields."""
if not self.model_id:
self.model_id = self.compute_model_id()
def compute_model_id(self) -> str:
"""
Compute canonical model ID from components.
Format: base_model::variant_spec::fine_tune_spec
"""
parts = [self.base_model]
# Add variant spec
if self.variant.quantization != "none":
parts.append(f"q:{self.variant.quantization}")
if self.variant.format != "unknown":
parts.append(f"fmt:{self.variant.format}")
if self.variant.provider:
parts.append(f"by:{self.variant.provider}")
# Add fine-tune spec
if self.fine_tune.type != "none":
parts.append(f"ft:{self.fine_tune.type}")
if self.fine_tune.adapter_id:
parts.append(f"adapter:{self.fine_tune.adapter_id}")
return "::".join(parts)
def compute_merkle_root(self) -> str:
"""Compute merkle root of this identity."""
# Create canonical representation
canonical = {
"base_model": self.base_model,
"model_id": self.model_id,
"variant": self.variant.to_dict(),
"fine_tune": self.fine_tune.to_dict(),
"weight_hash": self.weight_hash,
"config_hash": self.config_hash,
"tokenizer_hash": self.tokenizer_hash,
"parent_root": self.parent_root,
"created_at": self.created_at,
}
# Add behavioral fingerprint if present
if self.behavioral_fingerprint:
canonical["behavioral_fingerprint"] = self.behavioral_fingerprint.probe_hash
# Hash it
canonical_json = json.dumps(canonical, sort_keys=True)
self.merkle_root = hashlib.sha256(canonical_json.encode()).hexdigest()[:16]
return self.merkle_root
def finalize(self, parent_root: str = None):
"""Finalize identity and compute merkle root."""
if parent_root:
self.parent_root = parent_root
self.merkle_root = self.compute_merkle_root()
return self
def to_dict(self) -> dict:
"""Convert to dictionary for serialization."""
return {
"base_model": self.base_model,
"model_id": self.model_id,
"variant": self.variant.to_dict(),
"fine_tune": self.fine_tune.to_dict(),
"weight_hash": self.weight_hash,
"config_hash": self.config_hash,
"tokenizer_hash": self.tokenizer_hash,
"behavioral_fingerprint": self.behavioral_fingerprint.to_dict() if self.behavioral_fingerprint else None,
"source_url": self.source_url,
"source_revision": self.source_revision,
"downloaded_at": self.downloaded_at,
"parent_root": self.parent_root,
"merkle_root": self.merkle_root,
"created_at": self.created_at,
"parameters": self.parameters,
"context_length": self.context_length,
"architecture": self.architecture,
"license": self.license,
}
def to_chain_format(self) -> dict:
"""Convert to provenance chain format for lattice storage."""
return {
"session_id": f"model_identity_{self.merkle_root}",
"model_id": self.model_id,
"model_hash": self.weight_hash or self.behavioral_fingerprint.probe_hash if self.behavioral_fingerprint else "unknown",
"input_hash": self.base_model,
"output_hash": None,
"records": {
"identity": {
"layer_name": "identity",
"layer_idx": 0,
"state_hash": self.merkle_root,
"parent_hashes": [self.parent_root] if self.parent_root else [],
"params_hash": self.config_hash,
"shape": [self.parameters] if self.parameters else [0],
"dtype": "model_identity",
"stats": self.to_dict(),
"execution_order": 0,
"timestamp": self.created_at,
}
},
"external_roots": [self.parent_root] if self.parent_root else [],
"merkle_root": self.merkle_root,
"created_at": self.created_at,
"finalized": True,
}
# =============================================================================
# STANDARD PROBES FOR BEHAVIORAL FINGERPRINTING
# =============================================================================
STANDARD_PROBES_V1 = [
# Deterministic probes (temperature=0)
{
"id": "math_simple",
"prompt": "What is 2+2? Answer with just the number.",
"params": {"temperature": 0, "max_tokens": 10},
},
{
"id": "capital_france",
"prompt": "Complete this sentence with one word: The capital of France is",
"params": {"temperature": 0, "max_tokens": 10},
},
{
"id": "translate_hello",
"prompt": "Translate to French: Hello",
"params": {"temperature": 0, "max_tokens": 20},
},
{
"id": "color_sky",
"prompt": "What color is the sky on a clear day? One word answer:",
"params": {"temperature": 0, "max_tokens": 10},
},
# Capability probes
{
"id": "code_simple",
"prompt": "Write a Python function that adds two numbers. Just the function, no explanation.",
"params": {"temperature": 0, "max_tokens": 100},
},
{
"id": "reasoning",
"prompt": "If all cats are mammals and all mammals are animals, are all cats animals? Answer yes or no.",
"params": {"temperature": 0, "max_tokens": 10},
},
# System prompt probe
{
"id": "system_role",
"prompt": "You are a helpful pirate. Say hello.",
"params": {"temperature": 0, "max_tokens": 50},
"system": "You are a helpful pirate who speaks like a pirate.",
},
# Edge cases
{
"id": "empty",
"prompt": "",
"params": {"temperature": 0, "max_tokens": 50},
},
{
"id": "repetition",
"prompt": "Repeat after me exactly: The quick brown fox",
"params": {"temperature": 0, "max_tokens": 20},
},
]
def generate_behavioral_fingerprint(
call_fn, # Function that takes (prompt, params) and returns response
probes: List[dict] = None,
version: int = 1,
) -> BehavioralFingerprint:
"""
Generate behavioral fingerprint by running standard probes.
Args:
call_fn: Function to call the model. Signature: (prompt, params) -> str
probes: List of probe configs. Defaults to STANDARD_PROBES_V1.
version: Fingerprint version number.
Returns:
BehavioralFingerprint with hashed responses.
"""
if probes is None:
probes = STANDARD_PROBES_V1
responses = []
for probe in probes:
try:
response = call_fn(probe["prompt"], probe.get("params", {}))
response_hash = hashlib.sha256(str(response).encode()).hexdigest()[:16]
except Exception as e:
response_hash = f"error:{type(e).__name__}"
responses.append({
"probe_id": probe["id"],
"prompt_hash": hashlib.sha256(probe["prompt"].encode()).hexdigest()[:16],
"response_hash": response_hash,
})
# Compute overall fingerprint hash
fingerprint_data = json.dumps(responses, sort_keys=True)
probe_hash = hashlib.sha256(fingerprint_data.encode()).hexdigest()[:16]
return BehavioralFingerprint(
probe_responses=responses,
probe_hash=probe_hash,
fingerprint_version=version,
generated_at=time.time(),
)
# =============================================================================
# MODEL IDENTITY FACTORY
# =============================================================================
def detect_quantization(model_path: str) -> str:
"""Detect quantization from model path or name."""
path_lower = model_path.lower()
# GGUF quantizations
for q in ["q4_k_m", "q4_k_s", "q4_0", "q5_k_m", "q5_k_s", "q5_0", "q6_k", "q8_0"]:
if q in path_lower:
return q.upper()
# GPTQ
if "gptq" in path_lower:
if "4bit" in path_lower or "-4b" in path_lower:
return "GPTQ-4bit"
elif "8bit" in path_lower or "-8b" in path_lower:
return "GPTQ-8bit"
return "GPTQ"
# AWQ
if "awq" in path_lower:
return "AWQ-4bit"
# BitsAndBytes
if "bnb" in path_lower or "bitsandbytes" in path_lower:
if "4bit" in path_lower:
return "bnb-4bit"
return "bnb-8bit"
return "none"
def detect_format(model_path: str) -> str:
"""Detect model format from path."""
path_lower = model_path.lower()
if ".gguf" in path_lower:
return "gguf"
elif ".ggml" in path_lower:
return "ggml"
elif ".safetensors" in path_lower or "safetensors" in path_lower:
return "safetensors"
elif ".onnx" in path_lower:
return "onnx"
elif ".bin" in path_lower or "pytorch" in path_lower:
return "pytorch"
elif "api" in path_lower or "http" in path_lower:
return "api"
return "unknown"
def detect_provider(model_path: str) -> Optional[str]:
"""Detect who made this variant."""
path_lower = model_path.lower()
providers = [
"thebloke",
"unsloth",
"mlx-community",
"bartowski",
"mradermacher",
"turboderp",
]
for provider in providers:
if provider in path_lower:
return provider
return None
def create_model_identity(
model_id: str,
weights_path: Optional[Path] = None,
config: Optional[dict] = None,
parent_root: Optional[str] = None,
behavioral_fingerprint: Optional[BehavioralFingerprint] = None,
**kwargs,
) -> ModelIdentity:
"""
Factory function to create ModelIdentity from various inputs.
Args:
model_id: HuggingFace model ID or local path
weights_path: Path to weights file (for hashing)
config: Model config dict
parent_root: Merkle root of parent (genesis or base model)
behavioral_fingerprint: Pre-computed fingerprint for APIs
**kwargs: Additional fields (parameters, context_length, etc.)
Returns:
Finalized ModelIdentity ready for lattice
"""
# Parse base model from full ID
# e.g., "TheBloke/Llama-3-8B-GGUF" -> base is "meta-llama/Llama-3-8B"
base_model = kwargs.pop("base_model", None)
if not base_model:
# Try to extract base from model_id
parts = model_id.split("/")
if len(parts) >= 2:
name = parts[-1]
# Remove common suffixes
for suffix in ["-GGUF", "-GPTQ", "-AWQ", "-fp16", "-bf16", "-GGML"]:
name = name.replace(suffix, "")
base_model = name
else:
base_model = model_id
# Detect variant info
quantization = detect_quantization(model_id)
format_type = detect_format(model_id)
provider = detect_provider(model_id)
# Extract bits from quantization
bits = None
if "4" in quantization:
bits = 4
elif "5" in quantization:
bits = 5
elif "6" in quantization:
bits = 6
elif "8" in quantization:
bits = 8
variant = ModelVariant(
quantization=quantization,
format=format_type,
bits=bits,
provider=provider,
)
# Hash weights if available
weight_hash = None
if weights_path and Path(weights_path).exists():
# For large files, hash first and last 1MB + size
path = Path(weights_path)
size = path.stat().st_size
hasher = hashlib.sha256()
hasher.update(str(size).encode())
with open(path, "rb") as f:
# First 1MB
hasher.update(f.read(1024 * 1024))
# Last 1MB
if size > 2 * 1024 * 1024:
f.seek(-1024 * 1024, 2)
hasher.update(f.read())
weight_hash = hasher.hexdigest()[:16]
# Hash config if available
config_hash = None
if config:
config_json = json.dumps(config, sort_keys=True)
config_hash = hashlib.sha256(config_json.encode()).hexdigest()[:16]
# Create identity
identity = ModelIdentity(
base_model=base_model,
model_id="", # Will be computed
variant=variant,
fine_tune=FineTuneInfo(),
weight_hash=weight_hash,
config_hash=config_hash,
behavioral_fingerprint=behavioral_fingerprint,
parent_root=parent_root,
**kwargs,
)
# Compute model_id and merkle_root
identity.model_id = identity.compute_model_id()
identity.finalize(parent_root)
return identity
# =============================================================================
# MODEL REGISTRY (Lattice Integration)
# =============================================================================
class ModelRegistry:
"""
Registry of model identities in the lattice.
Provides:
- Get or create model identity
- Link observations to model identities
- Query models by various criteria
"""
def __init__(self, lattice_dir: Path = None, genesis_root: str = None):
self.lattice_dir = lattice_dir or Path(__file__).parent.parent / "lattice"
self.models_dir = self.lattice_dir / "models"
self.models_dir.mkdir(parents=True, exist_ok=True)
# Genesis root (models link to this if no base model)
self.genesis_root = genesis_root or "89f940c1a4b7aa65"
# Cache of loaded identities
self._cache: Dict[str, ModelIdentity] = {}
self._load_all()
def _load_all(self):
"""Load all model identities from disk."""
for json_file in self.models_dir.glob("*.json"):
try:
data = json.loads(json_file.read_text())
identity = self._dict_to_identity(data)
self._cache[identity.merkle_root] = identity
except Exception as e:
print(f"Error loading {json_file}: {e}")
def _dict_to_identity(self, data: dict) -> ModelIdentity:
"""Convert dict back to ModelIdentity."""
variant_data = data.get("variant", {})
fine_tune_data = data.get("fine_tune", {})
fingerprint_data = data.get("behavioral_fingerprint")
return ModelIdentity(
base_model=data["base_model"],
model_id=data["model_id"],
variant=ModelVariant(**variant_data),
fine_tune=FineTuneInfo(**fine_tune_data),
weight_hash=data.get("weight_hash"),
config_hash=data.get("config_hash"),
tokenizer_hash=data.get("tokenizer_hash"),
behavioral_fingerprint=BehavioralFingerprint(**fingerprint_data) if fingerprint_data else None,
source_url=data.get("source_url"),
source_revision=data.get("source_revision"),
downloaded_at=data.get("downloaded_at"),
parent_root=data.get("parent_root"),
merkle_root=data.get("merkle_root"),
created_at=data.get("created_at", time.time()),
parameters=data.get("parameters"),
context_length=data.get("context_length"),
architecture=data.get("architecture"),
license=data.get("license"),
)
def _save_identity(self, identity: ModelIdentity):
"""Save identity to disk."""
filename = f"{identity.merkle_root}.json"
filepath = self.models_dir / filename
filepath.write_text(json.dumps(identity.to_dict(), indent=2))
def get_or_create(
self,
model_id: str,
**kwargs,
) -> ModelIdentity:
"""
Get existing model identity or create new one.
If model already exists in registry, returns existing.
Otherwise creates new identity linked to genesis or base model.
"""
# Check if we have this model already
for identity in self._cache.values():
if identity.model_id == model_id or identity.base_model == model_id:
return identity
# Determine parent
# If this is a variant, try to find base model
parent_root = kwargs.pop("parent_root", None)
if not parent_root:
base = kwargs.get("base_model")
if base:
for identity in self._cache.values():
if identity.base_model == base and identity.variant.quantization == "none":
parent_root = identity.merkle_root
break
# Default to genesis
if not parent_root:
parent_root = self.genesis_root
# Create new identity
identity = create_model_identity(
model_id=model_id,
parent_root=parent_root,
**kwargs,
)
# Cache and save
self._cache[identity.merkle_root] = identity
self._save_identity(identity)
return identity
def get_by_root(self, merkle_root: str) -> Optional[ModelIdentity]:
"""Get model identity by merkle root."""
return self._cache.get(merkle_root)
def list_all(self) -> List[ModelIdentity]:
"""List all registered models."""
return list(self._cache.values())
def list_by_base(self, base_model: str) -> List[ModelIdentity]:
"""List all variants of a base model."""
return [i for i in self._cache.values() if i.base_model == base_model]
def search(self, query: str) -> List[ModelIdentity]:
"""Search models by name."""
query_lower = query.lower()
return [
i for i in self._cache.values()
if query_lower in i.model_id.lower() or query_lower in i.base_model.lower()
]
# =============================================================================
# CLI
# =============================================================================
if __name__ == "__main__":
import sys
# Test: Create some model identities
print("=== CASCADE Model Identity Layer ===\n")
# Initialize registry
registry = ModelRegistry()
# Create some test identities
test_models = [
"meta-llama/Llama-3-8B",
"TheBloke/Llama-3-8B-GGUF",
"unsloth/Llama-3-8B-bnb-4bit",
"anthropic/claude-3-opus",
"openai/gpt-4",
]
for model in test_models:
identity = registry.get_or_create(model)
print(f"Model: {identity.model_id}")
print(f" Base: {identity.base_model}")
print(f" Quant: {identity.variant.quantization}")
print(f" Format: {identity.variant.format}")
print(f" Merkle: {identity.merkle_root}")
print(f" Parent: {identity.parent_root}")
print()
print(f"Total models in registry: {len(registry.list_all())}")