Spaces:

tostido
/

Cascade

Configuration error

App Files Files Community

Cascade / cascade /identity.py

tostido

Initial commit - cascade-lattice 0.5.4

77bcbf1 3 months ago

raw

history blame contribute delete

23.8 kB

	"""
	CASCADE Model Identity Layer

	Canonical identification for any AI model variant:
	- Base models (meta-llama/Llama-3-8B)
	- Quantizations (Q4_K_M, Q8_0, AWQ, GPTQ)
	- Fine-tunes (LoRA, full, RLHF)
	- API endpoints (behavioral fingerprinting)

	Every unique model gets a node in the lattice.
	Every observation links to its model's node.
	The lattice becomes the collective memory of AI behavior.

	"Same name, different model, different behavior."
	"""

	import hashlib
	import json
	import time
	from pathlib import Path
	from dataclasses import dataclass, field, asdict
	from typing import Optional, List, Dict, Any
	from enum import Enum


	class ModelFormat(Enum):
	"""Model weight formats."""
	SAFETENSORS = "safetensors"
	PYTORCH = "pytorch"
	GGUF = "gguf"
	GGML = "ggml"
	ONNX = "onnx"
	TENSORRT = "tensorrt"
	OPENVINO = "openvino"
	COREML = "coreml"
	API = "api" # No weights, just endpoint
	UNKNOWN = "unknown"


	class QuantizationType(Enum):
	"""Quantization methods."""
	NONE = "none" # FP32/FP16/BF16
	GGUF_Q4_0 = "Q4_0"
	GGUF_Q4_K_M = "Q4_K_M"
	GGUF_Q4_K_S = "Q4_K_S"
	GGUF_Q5_0 = "Q5_0"
	GGUF_Q5_K_M = "Q5_K_M"
	GGUF_Q5_K_S = "Q5_K_S"
	GGUF_Q6_K = "Q6_K"
	GGUF_Q8_0 = "Q8_0"
	GPTQ_4BIT = "GPTQ-4bit"
	GPTQ_8BIT = "GPTQ-8bit"
	AWQ_4BIT = "AWQ-4bit"
	BITSANDBYTES_4BIT = "bnb-4bit"
	BITSANDBYTES_8BIT = "bnb-8bit"
	INT8 = "INT8"
	INT4 = "INT4"
	CUSTOM = "custom"


	class FineTuneType(Enum):
	"""Fine-tuning methods."""
	NONE = "none"
	LORA = "lora"
	QLORA = "qlora"
	FULL = "full"
	RLHF = "rlhf"
	DPO = "dpo"
	ORPO = "orpo"
	CUSTOM = "custom"


	@dataclass
	class ModelVariant:
	"""Describes how a model differs from its base."""
	quantization: str = "none"
	format: str = "unknown"
	bits: Optional[int] = None
	provider: Optional[str] = None # Who made this variant (e.g., "TheBloke")

	def to_dict(self) -> dict:
	return asdict(self)


	@dataclass
	class FineTuneInfo:
	"""Describes fine-tuning applied to a model."""
	type: str = "none"
	adapter_id: Optional[str] = None # HuggingFace adapter ID
	adapter_hash: Optional[str] = None # Hash of adapter weights
	base_model_root: Optional[str] = None # Merkle root of base model identity
	dataset_id: Optional[str] = None # Training dataset

	def to_dict(self) -> dict:
	return asdict(self)


	@dataclass
	class BehavioralFingerprint:
	"""
	Fingerprint for API models where weights are unavailable.
	Generated by running standard probes and hashing responses.
	"""
	probe_responses: List[Dict[str, Any]] = field(default_factory=list)
	probe_hash: Optional[str] = None
	fingerprint_version: int = 1
	generated_at: Optional[float] = None

	def to_dict(self) -> dict:
	return asdict(self)


	@dataclass
	class ModelIdentity:
	"""
	Canonical identity for any AI model variant.

	This is the node that goes in the lattice.
	All observations of this model link to this identity.
	"""
	# === Core Identity ===
	base_model: str # HuggingFace ID or canonical name
	model_id: str # Full unique identifier (computed)

	# === Variant Info ===
	variant: ModelVariant = field(default_factory=ModelVariant)
	fine_tune: FineTuneInfo = field(default_factory=FineTuneInfo)

	# === Cryptographic Identity ===
	weight_hash: Optional[str] = None # SHA256 of weights (if available)
	config_hash: Optional[str] = None # SHA256 of model config
	tokenizer_hash: Optional[str] = None # SHA256 of tokenizer

	# === Behavioral Fingerprint (for APIs) ===
	behavioral_fingerprint: Optional[BehavioralFingerprint] = None

	# === Source Info ===
	source_url: Optional[str] = None
	source_revision: Optional[str] = None # Git commit/tag
	downloaded_at: Optional[float] = None

	# === Lattice Info ===
	parent_root: Optional[str] = None # Genesis or base model's merkle root
	merkle_root: Optional[str] = None # This identity's merkle root
	created_at: float = field(default_factory=time.time)

	# === Metadata ===
	parameters: Optional[int] = None # Parameter count
	context_length: Optional[int] = None
	architecture: Optional[str] = None # "llama", "mistral", "gpt", etc.
	license: Optional[str] = None

	def __post_init__(self):
	"""Compute derived fields."""
	if not self.model_id:
	self.model_id = self.compute_model_id()

	def compute_model_id(self) -> str:
	"""
	Compute canonical model ID from components.
	Format: base_model::variant_spec::fine_tune_spec
	"""
	parts = [self.base_model]

	# Add variant spec
	if self.variant.quantization != "none":
	parts.append(f"q:{self.variant.quantization}")
	if self.variant.format != "unknown":
	parts.append(f"fmt:{self.variant.format}")
	if self.variant.provider:
	parts.append(f"by:{self.variant.provider}")

	# Add fine-tune spec
	if self.fine_tune.type != "none":
	parts.append(f"ft:{self.fine_tune.type}")
	if self.fine_tune.adapter_id:
	parts.append(f"adapter:{self.fine_tune.adapter_id}")

	return "::".join(parts)

	def compute_merkle_root(self) -> str:
	"""Compute merkle root of this identity."""
	# Create canonical representation
	canonical = {
	"base_model": self.base_model,
	"model_id": self.model_id,
	"variant": self.variant.to_dict(),
	"fine_tune": self.fine_tune.to_dict(),
	"weight_hash": self.weight_hash,
	"config_hash": self.config_hash,
	"tokenizer_hash": self.tokenizer_hash,
	"parent_root": self.parent_root,
	"created_at": self.created_at,
	}

	# Add behavioral fingerprint if present
	if self.behavioral_fingerprint:
	canonical["behavioral_fingerprint"] = self.behavioral_fingerprint.probe_hash

	# Hash it
	canonical_json = json.dumps(canonical, sort_keys=True)
	self.merkle_root = hashlib.sha256(canonical_json.encode()).hexdigest()[:16]
	return self.merkle_root

	def finalize(self, parent_root: str = None):
	"""Finalize identity and compute merkle root."""
	if parent_root:
	self.parent_root = parent_root
	self.merkle_root = self.compute_merkle_root()
	return self

	def to_dict(self) -> dict:
	"""Convert to dictionary for serialization."""
	return {
	"base_model": self.base_model,
	"model_id": self.model_id,
	"variant": self.variant.to_dict(),
	"fine_tune": self.fine_tune.to_dict(),
	"weight_hash": self.weight_hash,
	"config_hash": self.config_hash,
	"tokenizer_hash": self.tokenizer_hash,
	"behavioral_fingerprint": self.behavioral_fingerprint.to_dict() if self.behavioral_fingerprint else None,
	"source_url": self.source_url,
	"source_revision": self.source_revision,
	"downloaded_at": self.downloaded_at,
	"parent_root": self.parent_root,
	"merkle_root": self.merkle_root,
	"created_at": self.created_at,
	"parameters": self.parameters,
	"context_length": self.context_length,
	"architecture": self.architecture,
	"license": self.license,
	}

	def to_chain_format(self) -> dict:
	"""Convert to provenance chain format for lattice storage."""
	return {
	"session_id": f"model_identity_{self.merkle_root}",
	"model_id": self.model_id,
	"model_hash": self.weight_hash or self.behavioral_fingerprint.probe_hash if self.behavioral_fingerprint else "unknown",
	"input_hash": self.base_model,
	"output_hash": None,
	"records": {
	"identity": {
	"layer_name": "identity",
	"layer_idx": 0,
	"state_hash": self.merkle_root,
	"parent_hashes": [self.parent_root] if self.parent_root else [],
	"params_hash": self.config_hash,
	"shape": [self.parameters] if self.parameters else [0],
	"dtype": "model_identity",
	"stats": self.to_dict(),
	"execution_order": 0,
	"timestamp": self.created_at,
	}
	},
	"external_roots": [self.parent_root] if self.parent_root else [],
	"merkle_root": self.merkle_root,
	"created_at": self.created_at,
	"finalized": True,
	}


	# =============================================================================
	# STANDARD PROBES FOR BEHAVIORAL FINGERPRINTING
	# =============================================================================

	STANDARD_PROBES_V1 = [
	# Deterministic probes (temperature=0)
	{
	"id": "math_simple",
	"prompt": "What is 2+2? Answer with just the number.",
	"params": {"temperature": 0, "max_tokens": 10},
	},
	{
	"id": "capital_france",
	"prompt": "Complete this sentence with one word: The capital of France is",
	"params": {"temperature": 0, "max_tokens": 10},
	},
	{
	"id": "translate_hello",
	"prompt": "Translate to French: Hello",
	"params": {"temperature": 0, "max_tokens": 20},
	},
	{
	"id": "color_sky",
	"prompt": "What color is the sky on a clear day? One word answer:",
	"params": {"temperature": 0, "max_tokens": 10},
	},

	# Capability probes
	{
	"id": "code_simple",
	"prompt": "Write a Python function that adds two numbers. Just the function, no explanation.",
	"params": {"temperature": 0, "max_tokens": 100},
	},
	{
	"id": "reasoning",
	"prompt": "If all cats are mammals and all mammals are animals, are all cats animals? Answer yes or no.",
	"params": {"temperature": 0, "max_tokens": 10},
	},

	# System prompt probe
	{
	"id": "system_role",
	"prompt": "You are a helpful pirate. Say hello.",
	"params": {"temperature": 0, "max_tokens": 50},
	"system": "You are a helpful pirate who speaks like a pirate.",
	},

	# Edge cases
	{
	"id": "empty",
	"prompt": "",
	"params": {"temperature": 0, "max_tokens": 50},
	},
	{
	"id": "repetition",
	"prompt": "Repeat after me exactly: The quick brown fox",
	"params": {"temperature": 0, "max_tokens": 20},
	},
	]


	def generate_behavioral_fingerprint(
	call_fn, # Function that takes (prompt, params) and returns response
	probes: List[dict] = None,
	version: int = 1,
	) -> BehavioralFingerprint:
	"""
	Generate behavioral fingerprint by running standard probes.

	Args:
	call_fn: Function to call the model. Signature: (prompt, params) -> str
	probes: List of probe configs. Defaults to STANDARD_PROBES_V1.
	version: Fingerprint version number.

	Returns:
	BehavioralFingerprint with hashed responses.
	"""
	if probes is None:
	probes = STANDARD_PROBES_V1

	responses = []
	for probe in probes:
	try:
	response = call_fn(probe["prompt"], probe.get("params", {}))
	response_hash = hashlib.sha256(str(response).encode()).hexdigest()[:16]
	except Exception as e:
	response_hash = f"error:{type(e).__name__}"

	responses.append({
	"probe_id": probe["id"],
	"prompt_hash": hashlib.sha256(probe["prompt"].encode()).hexdigest()[:16],
	"response_hash": response_hash,
	})

	# Compute overall fingerprint hash
	fingerprint_data = json.dumps(responses, sort_keys=True)
	probe_hash = hashlib.sha256(fingerprint_data.encode()).hexdigest()[:16]

	return BehavioralFingerprint(
	probe_responses=responses,
	probe_hash=probe_hash,
	fingerprint_version=version,
	generated_at=time.time(),
	)


	# =============================================================================
	# MODEL IDENTITY FACTORY
	# =============================================================================

	def detect_quantization(model_path: str) -> str:
	"""Detect quantization from model path or name."""
	path_lower = model_path.lower()

	# GGUF quantizations
	for q in ["q4_k_m", "q4_k_s", "q4_0", "q5_k_m", "q5_k_s", "q5_0", "q6_k", "q8_0"]:
	if q in path_lower:
	return q.upper()

	# GPTQ
	if "gptq" in path_lower:
	if "4bit" in path_lower or "-4b" in path_lower:
	return "GPTQ-4bit"
	elif "8bit" in path_lower or "-8b" in path_lower:
	return "GPTQ-8bit"
	return "GPTQ"

	# AWQ
	if "awq" in path_lower:
	return "AWQ-4bit"

	# BitsAndBytes
	if "bnb" in path_lower or "bitsandbytes" in path_lower:
	if "4bit" in path_lower:
	return "bnb-4bit"
	return "bnb-8bit"

	return "none"


	def detect_format(model_path: str) -> str:
	"""Detect model format from path."""
	path_lower = model_path.lower()

	if ".gguf" in path_lower:
	return "gguf"
	elif ".ggml" in path_lower:
	return "ggml"
	elif ".safetensors" in path_lower or "safetensors" in path_lower:
	return "safetensors"
	elif ".onnx" in path_lower:
	return "onnx"
	elif ".bin" in path_lower or "pytorch" in path_lower:
	return "pytorch"
	elif "api" in path_lower or "http" in path_lower:
	return "api"

	return "unknown"


	def detect_provider(model_path: str) -> Optional[str]:
	"""Detect who made this variant."""
	path_lower = model_path.lower()

	providers = [
	"thebloke",
	"unsloth",
	"mlx-community",
	"bartowski",
	"mradermacher",
	"turboderp",
	]

	for provider in providers:
	if provider in path_lower:
	return provider

	return None


	def create_model_identity(
	model_id: str,
	weights_path: Optional[Path] = None,
	config: Optional[dict] = None,
	parent_root: Optional[str] = None,
	behavioral_fingerprint: Optional[BehavioralFingerprint] = None,
	**kwargs,
	) -> ModelIdentity:
	"""
	Factory function to create ModelIdentity from various inputs.

	Args:
	model_id: HuggingFace model ID or local path
	weights_path: Path to weights file (for hashing)
	config: Model config dict
	parent_root: Merkle root of parent (genesis or base model)
	behavioral_fingerprint: Pre-computed fingerprint for APIs
	**kwargs: Additional fields (parameters, context_length, etc.)

	Returns:
	Finalized ModelIdentity ready for lattice
	"""
	# Parse base model from full ID
	# e.g., "TheBloke/Llama-3-8B-GGUF" -> base is "meta-llama/Llama-3-8B"
	base_model = kwargs.pop("base_model", None)
	if not base_model:
	# Try to extract base from model_id
	parts = model_id.split("/")
	if len(parts) >= 2:
	name = parts[-1]
	# Remove common suffixes
	for suffix in ["-GGUF", "-GPTQ", "-AWQ", "-fp16", "-bf16", "-GGML"]:
	name = name.replace(suffix, "")
	base_model = name
	else:
	base_model = model_id

	# Detect variant info
	quantization = detect_quantization(model_id)
	format_type = detect_format(model_id)
	provider = detect_provider(model_id)

	# Extract bits from quantization
	bits = None
	if "4" in quantization:
	bits = 4
	elif "5" in quantization:
	bits = 5
	elif "6" in quantization:
	bits = 6
	elif "8" in quantization:
	bits = 8

	variant = ModelVariant(
	quantization=quantization,
	format=format_type,
	bits=bits,
	provider=provider,
	)

	# Hash weights if available
	weight_hash = None
	if weights_path and Path(weights_path).exists():
	# For large files, hash first and last 1MB + size
	path = Path(weights_path)
	size = path.stat().st_size
	hasher = hashlib.sha256()
	hasher.update(str(size).encode())

	with open(path, "rb") as f:
	# First 1MB
	hasher.update(f.read(1024 * 1024))
	# Last 1MB
	if size > 2 * 1024 * 1024:
	f.seek(-1024 * 1024, 2)
	hasher.update(f.read())

	weight_hash = hasher.hexdigest()[:16]

	# Hash config if available
	config_hash = None
	if config:
	config_json = json.dumps(config, sort_keys=True)
	config_hash = hashlib.sha256(config_json.encode()).hexdigest()[:16]

	# Create identity
	identity = ModelIdentity(
	base_model=base_model,
	model_id="", # Will be computed
	variant=variant,
	fine_tune=FineTuneInfo(),
	weight_hash=weight_hash,
	config_hash=config_hash,
	behavioral_fingerprint=behavioral_fingerprint,
	parent_root=parent_root,
	**kwargs,
	)

	# Compute model_id and merkle_root
	identity.model_id = identity.compute_model_id()
	identity.finalize(parent_root)

	return identity


	# =============================================================================
	# MODEL REGISTRY (Lattice Integration)
	# =============================================================================

	class ModelRegistry:
	"""
	Registry of model identities in the lattice.

	Provides:
	- Get or create model identity
	- Link observations to model identities
	- Query models by various criteria
	"""

	def __init__(self, lattice_dir: Path = None, genesis_root: str = None):
	self.lattice_dir = lattice_dir or Path(__file__).parent.parent / "lattice"
	self.models_dir = self.lattice_dir / "models"
	self.models_dir.mkdir(parents=True, exist_ok=True)

	# Genesis root (models link to this if no base model)
	self.genesis_root = genesis_root or "89f940c1a4b7aa65"

	# Cache of loaded identities
	self._cache: Dict[str, ModelIdentity] = {}
	self._load_all()

	def _load_all(self):
	"""Load all model identities from disk."""
	for json_file in self.models_dir.glob("*.json"):
	try:
	data = json.loads(json_file.read_text())
	identity = self._dict_to_identity(data)
	self._cache[identity.merkle_root] = identity
	except Exception as e:
	print(f"Error loading {json_file}: {e}")

	def _dict_to_identity(self, data: dict) -> ModelIdentity:
	"""Convert dict back to ModelIdentity."""
	variant_data = data.get("variant", {})
	fine_tune_data = data.get("fine_tune", {})
	fingerprint_data = data.get("behavioral_fingerprint")

	return ModelIdentity(
	base_model=data["base_model"],
	model_id=data["model_id"],
	variant=ModelVariant(**variant_data),
	fine_tune=FineTuneInfo(**fine_tune_data),
	weight_hash=data.get("weight_hash"),
	config_hash=data.get("config_hash"),
	tokenizer_hash=data.get("tokenizer_hash"),
	behavioral_fingerprint=BehavioralFingerprint(**fingerprint_data) if fingerprint_data else None,
	source_url=data.get("source_url"),
	source_revision=data.get("source_revision"),
	downloaded_at=data.get("downloaded_at"),
	parent_root=data.get("parent_root"),
	merkle_root=data.get("merkle_root"),
	created_at=data.get("created_at", time.time()),
	parameters=data.get("parameters"),
	context_length=data.get("context_length"),
	architecture=data.get("architecture"),
	license=data.get("license"),
	)

	def _save_identity(self, identity: ModelIdentity):
	"""Save identity to disk."""
	filename = f"{identity.merkle_root}.json"
	filepath = self.models_dir / filename
	filepath.write_text(json.dumps(identity.to_dict(), indent=2))

	def get_or_create(
	self,
	model_id: str,
	**kwargs,
	) -> ModelIdentity:
	"""
	Get existing model identity or create new one.

	If model already exists in registry, returns existing.
	Otherwise creates new identity linked to genesis or base model.
	"""
	# Check if we have this model already
	for identity in self._cache.values():
	if identity.model_id == model_id or identity.base_model == model_id:
	return identity

	# Determine parent
	# If this is a variant, try to find base model
	parent_root = kwargs.pop("parent_root", None)
	if not parent_root:
	base = kwargs.get("base_model")
	if base:
	for identity in self._cache.values():
	if identity.base_model == base and identity.variant.quantization == "none":
	parent_root = identity.merkle_root
	break

	# Default to genesis
	if not parent_root:
	parent_root = self.genesis_root

	# Create new identity
	identity = create_model_identity(
	model_id=model_id,
	parent_root=parent_root,
	**kwargs,
	)

	# Cache and save
	self._cache[identity.merkle_root] = identity
	self._save_identity(identity)

	return identity

	def get_by_root(self, merkle_root: str) -> Optional[ModelIdentity]:
	"""Get model identity by merkle root."""
	return self._cache.get(merkle_root)

	def list_all(self) -> List[ModelIdentity]:
	"""List all registered models."""
	return list(self._cache.values())

	def list_by_base(self, base_model: str) -> List[ModelIdentity]:
	"""List all variants of a base model."""
	return [i for i in self._cache.values() if i.base_model == base_model]

	def search(self, query: str) -> List[ModelIdentity]:
	"""Search models by name."""
	query_lower = query.lower()
	return [
	i for i in self._cache.values()
	if query_lower in i.model_id.lower() or query_lower in i.base_model.lower()
	]


	# =============================================================================
	# CLI
	# =============================================================================

	if __name__ == "__main__":
	import sys

	# Test: Create some model identities
	print("=== CASCADE Model Identity Layer ===\n")

	# Initialize registry
	registry = ModelRegistry()

	# Create some test identities
	test_models = [
	"meta-llama/Llama-3-8B",
	"TheBloke/Llama-3-8B-GGUF",
	"unsloth/Llama-3-8B-bnb-4bit",
	"anthropic/claude-3-opus",
	"openai/gpt-4",
	]

	for model in test_models:
	identity = registry.get_or_create(model)
	print(f"Model: {identity.model_id}")
	print(f" Base: {identity.base_model}")
	print(f" Quant: {identity.variant.quantization}")
	print(f" Format: {identity.variant.format}")
	print(f" Merkle: {identity.merkle_root}")
	print(f" Parent: {identity.parent_root}")
	print()

	print(f"Total models in registry: {len(registry.list_all())}")